shim_init.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_init.c
  15. *
  16. * This file contains entry and exit functions of library OS.
  17. */
  18. #include <shim_internal.h>
  19. #include <shim_table.h>
  20. #include <shim_tls.h>
  21. #include <shim_thread.h>
  22. #include <shim_handle.h>
  23. #include <shim_vma.h>
  24. #include <shim_checkpoint.h>
  25. #include <shim_fs.h>
  26. #include <shim_ipc.h>
  27. #include <shim_profile.h>
  28. #include <pal.h>
  29. #include <pal_debug.h>
  30. #include <pal_error.h>
  31. #include <sys/mman.h>
  32. #include <asm/unistd.h>
  33. #include <asm/fcntl.h>
  34. unsigned long allocsize;
  35. unsigned long allocshift;
  36. unsigned long allocmask;
  37. /* The following constants will help matching glibc version with compatible
  38. SHIM libraries */
  39. #include "glibc-version.h"
  40. const unsigned int glibc_version = GLIBC_VERSION;
  41. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  42. {
  43. shim_get_tls()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  44. }
  45. noreturn void __abort(void) {
  46. PAUSE();
  47. shim_terminate(-ENOTRECOVERABLE);
  48. }
  49. void warn (const char *format, ...)
  50. {
  51. va_list args;
  52. va_start (args, format);
  53. __SYS_VPRINTF(format, &args);
  54. va_end (args);
  55. }
  56. void __stack_chk_fail (void)
  57. {
  58. }
  59. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  60. /* reserved */ 0,
  61. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  62. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  63. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  64. /* PAL_ERROR_INVAL */ EINVAL,
  65. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  66. /* PAL_ERROR_DENIED */ EACCES,
  67. /* PAL_ERROR_BADHANDLE */ EFAULT,
  68. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  69. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  70. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  71. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  72. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  73. /* PAL_ERROR_INTERRUPTED */ EINTR,
  74. /* PAL_ERROR_OVERFLOW */ EFAULT,
  75. /* PAL_ERROR_BADADDR */ EFAULT,
  76. /* PAL_ERROR_NOMEM */ ENOMEM,
  77. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  78. /* PAL_ERROR_INCONSIST */ EFAULT,
  79. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  80. /* PAL_ERROR_ENDOFSTREAM */ 0,
  81. /* PAL_ERROR_NOTSERVER */ EINVAL,
  82. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  83. /* PAL_ERROR_ZEROSIZE */ 0,
  84. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  85. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  86. };
  87. long convert_pal_errno (long err)
  88. {
  89. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  90. pal_errno_to_unix_errno[err] : 0;
  91. }
  92. unsigned long parse_int (const char * str)
  93. {
  94. unsigned long num = 0;
  95. int radix = 10;
  96. char c;
  97. if (str[0] == '0') {
  98. str++;
  99. radix = 8;
  100. if (str[0] == 'x') {
  101. str++;
  102. radix = 16;
  103. }
  104. }
  105. while ((c = *(str++))) {
  106. int val;
  107. if (c >= 'A' && c <= 'F')
  108. val = c - 'A' + 10;
  109. else if (c >= 'a' && c <= 'f')
  110. val = c - 'a' + 10;
  111. else if (c >= '0' && c <= '9')
  112. val = c - '0';
  113. else
  114. break;
  115. if (val >= radix)
  116. break;
  117. num = num * radix + val;
  118. }
  119. if (c == 'G' || c == 'g')
  120. num *= 1024 * 1024 * 1024;
  121. else if (c == 'M' || c == 'm')
  122. num *= 1024 * 1024;
  123. else if (c == 'K' || c == 'k')
  124. num *= 1024;
  125. return num;
  126. }
  127. long int glibc_option (const char * opt)
  128. {
  129. char cfg[CONFIG_MAX];
  130. if (strcmp_static(opt, "heap_size")) {
  131. ssize_t ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  132. if (ret <= 0) {
  133. debug("no glibc option: %s (err=%ld)\n", opt, ret);
  134. return -ENOENT;
  135. }
  136. long int heap_size = parse_int(cfg);
  137. debug("glibc option: heap_size = %ld\n", heap_size);
  138. return (long int) heap_size;
  139. }
  140. return -EINVAL;
  141. }
  142. void * migrated_memory_start;
  143. void * migrated_memory_end;
  144. void * migrated_shim_addr;
  145. const char ** initial_envp __attribute_migratable;
  146. /* library_paths is populated with LD_PRELOAD entries once during LibOS
  147. * initialization and is used in __load_interp_object() to search for ELF
  148. * program interpreter in specific paths. Once allocated, its memory is
  149. * never freed or updated. */
  150. char ** library_paths = NULL;
  151. struct shim_lock __master_lock;
  152. bool lock_enabled;
  153. void init_tcb (shim_tcb_t * tcb)
  154. {
  155. tcb->canary = SHIM_TLS_CANARY;
  156. tcb->self = tcb;
  157. }
  158. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  159. {
  160. memset(new_tcb, 0, sizeof(shim_tcb_t));
  161. new_tcb->canary = SHIM_TLS_CANARY;
  162. new_tcb->self = new_tcb;
  163. new_tcb->tp = old_tcb->tp;
  164. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  165. new_tcb->tid = old_tcb->tid;
  166. new_tcb->debug_buf = old_tcb->debug_buf;
  167. }
  168. /* This function is used to allocate tls before interpreter start running */
  169. void allocate_tls (__libc_tcb_t * tcb, bool user, struct shim_thread * thread)
  170. {
  171. assert(tcb);
  172. tcb->tcb = tcb;
  173. init_tcb(&tcb->shim_tcb);
  174. if (thread) {
  175. thread->tcb = tcb;
  176. thread->user_tcb = user;
  177. tcb->shim_tcb.tp = thread;
  178. tcb->shim_tcb.tid = thread->tid;
  179. } else {
  180. tcb->shim_tcb.tp = NULL;
  181. tcb->shim_tcb.tid = 0;
  182. }
  183. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  184. assert(shim_tls_check_canary());
  185. }
  186. void populate_tls (__libc_tcb_t * tcb, bool user)
  187. {
  188. assert(tcb);
  189. tcb->tcb = tcb;
  190. copy_tcb(&tcb->shim_tcb, shim_get_tls());
  191. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  192. if (thread) {
  193. thread->tcb = tcb;
  194. thread->user_tcb = user;
  195. }
  196. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  197. assert(shim_tls_check_canary());
  198. }
  199. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  200. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  201. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  202. void * allocate_stack (size_t size, size_t protect_size, bool user)
  203. {
  204. size = ALIGN_UP(size);
  205. protect_size = ALIGN_UP(protect_size);
  206. /* preserve a non-readable, non-writable page below the user
  207. stack to stop user program to clobber other vmas */
  208. void * stack = NULL;
  209. int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
  210. if (user) {
  211. stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
  212. flags, NULL, 0, "stack");
  213. if (!stack)
  214. return NULL;
  215. stack = (void *)
  216. DkVirtualMemoryAlloc(stack, size + protect_size,
  217. 0, PAL_PROT_NONE);
  218. } else {
  219. stack = system_malloc(size + protect_size);
  220. }
  221. if (!stack)
  222. return NULL;
  223. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  224. INC_PROFILE_OCCURENCE(alloc_stack_count);
  225. stack += protect_size;
  226. // Ensure proper alignment for process' initial stack pointer value.
  227. stack += (16 - (uintptr_t)stack % 16) % 16;
  228. DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
  229. if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
  230. return NULL;
  231. debug("allocated stack at %p (size = %ld)\n", stack, size);
  232. return stack;
  233. }
  234. static int populate_user_stack (void * stack, size_t stack_size,
  235. elf_auxv_t ** auxpp, int ** argcpp,
  236. const char *** argvp, const char *** envpp)
  237. {
  238. const int argc = **argcpp;
  239. const char ** argv = *argvp, ** envp = *envpp;
  240. const char ** new_argv = NULL, ** new_envp = NULL;
  241. elf_auxv_t *new_auxp = NULL;
  242. void * stack_bottom = stack;
  243. void * stack_top = stack + stack_size;
  244. #define ALLOCATE_TOP(size) \
  245. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  246. stack_top; })
  247. #define ALLOCATE_BOTTOM(size) \
  248. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  249. stack_bottom - (size); })
  250. /* ld.so expects argc as long on stack, not int. */
  251. long * argcp = ALLOCATE_BOTTOM(sizeof(long));
  252. *argcp = **argcpp;
  253. if (!argv) {
  254. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  255. goto copy_envp;
  256. }
  257. new_argv = stack_bottom;
  258. while (argv) {
  259. for (const char ** a = argv ; *a ; a++) {
  260. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  261. int len = strlen(*a) + 1;
  262. char * abuf = ALLOCATE_TOP(len);
  263. memcpy(abuf, *a, len);
  264. *t = abuf;
  265. }
  266. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  267. copy_envp:
  268. if (!envp)
  269. break;
  270. new_envp = stack_bottom;
  271. argv = envp;
  272. envp = NULL;
  273. }
  274. if (!new_envp)
  275. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  276. /* reserve space for ELF aux vectors, populated later by LibOS */
  277. new_auxp = ALLOCATE_BOTTOM(REQUIRED_ELF_AUXV * sizeof(elf_auxv_t) +
  278. REQUIRED_ELF_AUXV_SPACE);
  279. /* x86_64 ABI requires 16 bytes alignment on stack on every function
  280. call. */
  281. size_t move_size = stack_bottom - stack;
  282. *argcpp = stack_top - move_size;
  283. *argcpp = ALIGN_DOWN_PTR(*argcpp, 16UL);
  284. **argcpp = argc;
  285. size_t shift = (void*)(*argcpp) - stack;
  286. memmove(*argcpp, stack, move_size);
  287. *argvp = new_argv ? (void *) new_argv + shift : NULL;
  288. *envpp = new_envp ? (void *) new_envp + shift : NULL;
  289. *auxpp = new_auxp ? (void *) new_auxp + shift : NULL;
  290. /* clear working area at the bottom */
  291. memset(stack, 0, shift);
  292. return 0;
  293. }
  294. unsigned long sys_stack_size = 0;
  295. int init_stack (const char ** argv, const char ** envp,
  296. int ** argcpp, const char *** argpp,
  297. elf_auxv_t ** auxpp)
  298. {
  299. if (!sys_stack_size) {
  300. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  301. if (root_config) {
  302. char stack_cfg[CONFIG_MAX];
  303. if (get_config(root_config, "sys.stack.size", stack_cfg,
  304. CONFIG_MAX) > 0)
  305. sys_stack_size = ALIGN_UP(parse_int(stack_cfg));
  306. }
  307. }
  308. struct shim_thread * cur_thread = get_cur_thread();
  309. if (!cur_thread || cur_thread->stack)
  310. return 0;
  311. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  312. if (!stack)
  313. return -ENOMEM;
  314. if (initial_envp)
  315. envp = initial_envp;
  316. int ret = populate_user_stack(stack, sys_stack_size, auxpp, argcpp, &argv, &envp);
  317. if (ret < 0)
  318. return ret;
  319. *argpp = argv;
  320. initial_envp = envp;
  321. cur_thread->stack_top = stack + sys_stack_size;
  322. cur_thread->stack = stack;
  323. cur_thread->stack_red = stack - allocsize;
  324. return 0;
  325. }
  326. int read_environs (const char ** envp)
  327. {
  328. for (const char ** e = envp ; *e ; e++) {
  329. if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
  330. /* populate library_paths with entries from LD_LIBRARY_PATH envvar */
  331. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  332. size_t npaths = 2; // One for the first entry, one for the last
  333. // NULL.
  334. for (const char * tmp = s ; *tmp ; tmp++)
  335. if (*tmp == ':')
  336. npaths++;
  337. char** paths = malloc(sizeof(const char *) *
  338. npaths);
  339. if (!paths)
  340. return -ENOMEM;
  341. size_t cnt = 0;
  342. while (*s) {
  343. const char * next;
  344. for (next = s ; *next && *next != ':' ; next++);
  345. size_t len = next - s;
  346. char * str = malloc(len + 1);
  347. if (!str) {
  348. for (size_t i = 0; i < cnt; i++)
  349. free(paths[i]);
  350. free(paths);
  351. return -ENOMEM;
  352. }
  353. memcpy(str, s, len);
  354. str[len] = 0;
  355. paths[cnt++] = str;
  356. s = *next ? next + 1 : next;
  357. }
  358. paths[cnt] = NULL;
  359. assert(!library_paths);
  360. library_paths = paths;
  361. return 0;
  362. }
  363. }
  364. return 0;
  365. }
  366. struct config_store * root_config = NULL;
  367. static void * __malloc (size_t size)
  368. {
  369. return malloc(size);
  370. }
  371. static void __free (void * mem)
  372. {
  373. free(mem);
  374. }
  375. int init_manifest (PAL_HANDLE manifest_handle)
  376. {
  377. int ret = 0;
  378. void * addr = NULL;
  379. size_t size = 0, map_size = 0;
  380. #define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  381. if (PAL_CB(manifest_preload.start)) {
  382. addr = PAL_CB(manifest_preload.start);
  383. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  384. } else {
  385. PAL_STREAM_ATTR attr;
  386. if (!DkStreamAttributesQueryByHandle(manifest_handle, &attr))
  387. return -PAL_ERRNO;
  388. size = attr.pending_size;
  389. map_size = ALIGN_UP(size);
  390. addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
  391. NULL, 0, "manifest");
  392. if (!addr)
  393. return -ENOMEM;
  394. void * ret_addr = DkStreamMap(manifest_handle, addr,
  395. PAL_PROT_READ, 0,
  396. ALIGN_UP(size));
  397. if (!ret_addr) {
  398. bkeep_munmap(addr, map_size, MAP_FLAGS);
  399. return -ENOMEM;
  400. } else {
  401. assert(addr == ret_addr);
  402. }
  403. }
  404. struct config_store * new_root_config = malloc(sizeof(struct config_store));
  405. if (!new_root_config) {
  406. ret = -ENOMEM;
  407. goto fail;
  408. }
  409. new_root_config->raw_data = addr;
  410. new_root_config->raw_size = size;
  411. new_root_config->malloc = __malloc;
  412. new_root_config->free = __free;
  413. const char * errstring = "Unexpected error";
  414. if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
  415. SYS_PRINTF("Unable to read manifest file: %s\n", errstring);
  416. goto fail;
  417. }
  418. root_config = new_root_config;
  419. return 0;
  420. fail:
  421. if (map_size) {
  422. DkStreamUnmap(addr, map_size);
  423. if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
  424. BUG();
  425. }
  426. free(new_root_config);
  427. return ret;
  428. }
  429. #ifdef PROFILE
  430. struct shim_profile profile_root;
  431. #endif
  432. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  433. do { \
  434. void *_tmp = (cookie); \
  435. (argv) = _tmp; \
  436. _tmp += sizeof(char *) * ((argc) + 1); \
  437. (envp) = _tmp; \
  438. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  439. (auxp) = _tmp + sizeof(char *); \
  440. } while (0)
  441. #ifdef PROFILE
  442. static void set_profile_enabled (const char ** envp)
  443. {
  444. const char ** p;
  445. for (p = envp ; (*p) ; p++)
  446. if (strpartcmp_static(*p, "PROFILE_ENABLED="))
  447. break;
  448. if (!(*p))
  449. return;
  450. for (int i = 0 ; i < N_PROFILE ; i++)
  451. PROFILES[i].disabled = true;
  452. const char * str = (*p) + 16;
  453. bool enabled = false;
  454. while (*str) {
  455. const char * next = str;
  456. for ( ; (*next) && (*next) != ',' ; next++);
  457. if (next > str) {
  458. int len = next - str;
  459. for (int i = 0 ; i < N_PROFILE ; i++) {
  460. struct shim_profile * profile = &PROFILES[i];
  461. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  462. profile->disabled = false;
  463. if (profile->type == CATEGORY)
  464. enabled = true;
  465. }
  466. }
  467. }
  468. str = (*next) ? next + 1 : next;
  469. }
  470. while (enabled) {
  471. enabled = false;
  472. for (int i = 0 ; i < N_PROFILE ; i++) {
  473. struct shim_profile * profile = &PROFILES[i];
  474. if (!profile->disabled || profile->root == &profile_)
  475. continue;
  476. if (!profile->root->disabled) {
  477. profile->disabled = false;
  478. if (profile->type == CATEGORY)
  479. enabled = true;
  480. }
  481. }
  482. }
  483. for (int i = 0 ; i < N_PROFILE ; i++) {
  484. struct shim_profile * profile = &PROFILES[i];
  485. if (profile->type == CATEGORY || profile->disabled)
  486. continue;
  487. for (profile = profile->root ;
  488. profile != &profile_ && profile->disabled ;
  489. profile = profile->root)
  490. profile->disabled = false;
  491. }
  492. }
  493. #endif
  494. static int init_newproc (struct newproc_header * hdr)
  495. {
  496. BEGIN_PROFILE_INTERVAL();
  497. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  498. sizeof(struct newproc_header), hdr,
  499. NULL, 0);
  500. if (!bytes)
  501. return -PAL_ERRNO;
  502. SAVE_PROFILE_INTERVAL(child_wait_header);
  503. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  504. return hdr->failure;
  505. }
  506. DEFINE_PROFILE_CATEGORY(pal, );
  507. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  508. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  509. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  510. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  511. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  512. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  513. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  514. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  515. DEFINE_PROFILE_CATEGORY(init, );
  516. DEFINE_PROFILE_INTERVAL(init_vma, init);
  517. DEFINE_PROFILE_INTERVAL(init_slab, init);
  518. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  519. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  520. DEFINE_PROFILE_INTERVAL(init_fs, init);
  521. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  522. DEFINE_PROFILE_INTERVAL(init_handle, init);
  523. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  524. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  525. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  526. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  527. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  528. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  529. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  530. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  531. DEFINE_PROFILE_INTERVAL(init_thread, init);
  532. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  533. DEFINE_PROFILE_INTERVAL(init_mount, init);
  534. DEFINE_PROFILE_INTERVAL(init_async, init);
  535. DEFINE_PROFILE_INTERVAL(init_stack, init);
  536. DEFINE_PROFILE_INTERVAL(read_environs, init);
  537. DEFINE_PROFILE_INTERVAL(init_loader, init);
  538. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  539. DEFINE_PROFILE_INTERVAL(init_signal, init);
  540. #define CALL_INIT(func, args ...) func(args)
  541. #define RUN_INIT(func, ...) \
  542. do { \
  543. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  544. if (_err < 0) { \
  545. SYS_PRINTF("shim_init() in " #func " (%d)\n", _err); \
  546. shim_terminate(_err); \
  547. } \
  548. SAVE_PROFILE_INTERVAL(func); \
  549. } while (0)
  550. extern PAL_HANDLE thread_start_event;
  551. noreturn void* shim_init (int argc, void * args)
  552. {
  553. debug_handle = PAL_CB(debug_stream);
  554. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  555. /* create the initial TCB, shim can not be run without a tcb */
  556. __libc_tcb_t tcb;
  557. memset(&tcb, 0, sizeof(__libc_tcb_t));
  558. allocate_tls(&tcb, false, NULL);
  559. __disable_preempt(&tcb.shim_tcb); // Temporarily disable preemption for delaying any signal
  560. // that arrives during initialization
  561. debug_setbuf(&tcb.shim_tcb, true);
  562. debug("set tcb to %p\n", &tcb);
  563. #ifdef PROFILE
  564. unsigned long begin_time = GET_PROFILE_INTERVAL();
  565. #endif
  566. debug("host: %s\n", PAL_CB(host_type));
  567. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE);
  568. allocsize = PAL_CB(alloc_align);
  569. allocshift = allocsize - 1;
  570. allocmask = ~allocshift;
  571. create_lock(&__master_lock);
  572. int * argcp = &argc;
  573. const char ** argv, ** envp, ** argp = NULL;
  574. elf_auxv_t * auxp;
  575. /* call to figure out where the arguments are */
  576. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  577. #ifdef PROFILE
  578. set_profile_enabled(envp);
  579. #endif
  580. struct newproc_header hdr;
  581. void * cpaddr = NULL;
  582. #ifdef PROFILE
  583. unsigned long begin_create_time = 0;
  584. #endif
  585. BEGIN_PROFILE_INTERVAL();
  586. RUN_INIT(init_vma);
  587. RUN_INIT(init_slab);
  588. RUN_INIT(read_environs, envp);
  589. RUN_INIT(init_str_mgr);
  590. RUN_INIT(init_internal_map);
  591. RUN_INIT(init_fs);
  592. RUN_INIT(init_dcache);
  593. RUN_INIT(init_handle);
  594. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  595. if (argc && argv[0][0] == '-') {
  596. if (strcmp_static(argv[0], "-resume") && argc >= 2) {
  597. const char * filename = *(argv + 1);
  598. argc -= 2;
  599. argv += 2;
  600. RUN_INIT(init_mount_root);
  601. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  602. &cpaddr);
  603. goto restore;
  604. }
  605. }
  606. if (PAL_CB(parent_process)) {
  607. RUN_INIT(init_newproc, &hdr);
  608. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  609. hdr.create_time, begin_time);
  610. #ifdef PROFILE
  611. begin_create_time = hdr.begin_create_time;
  612. #endif
  613. if (hdr.checkpoint.hdr.size)
  614. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  615. }
  616. if (cpaddr) {
  617. restore:
  618. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  619. RUN_INIT(restore_checkpoint,
  620. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  621. (ptr_t) cpaddr, 0);
  622. }
  623. if (PAL_CB(manifest_handle))
  624. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  625. RUN_INIT(init_mount_root);
  626. RUN_INIT(init_ipc);
  627. RUN_INIT(init_thread);
  628. RUN_INIT(init_mount);
  629. RUN_INIT(init_important_handles);
  630. RUN_INIT(init_async);
  631. RUN_INIT(init_stack, argv, envp, &argcp, &argp, &auxp);
  632. RUN_INIT(init_loader);
  633. RUN_INIT(init_ipc_helper);
  634. RUN_INIT(init_signal);
  635. if (PAL_CB(parent_process)) {
  636. /* Notify the parent process */
  637. struct newproc_response res;
  638. res.child_vmid = cur_process.vmid;
  639. res.failure = 0;
  640. if (!DkStreamWrite(PAL_CB(parent_process), 0,
  641. sizeof(struct newproc_response),
  642. &res, NULL))
  643. shim_do_exit(-PAL_ERRNO);
  644. }
  645. debug("shim process initialized\n");
  646. #ifdef PROFILE
  647. if (begin_create_time)
  648. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  649. begin_create_time);
  650. #endif
  651. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  652. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  653. pal_control.host_specific_startup_time);
  654. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  655. pal_control.relocation_time);
  656. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  657. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  658. pal_control.manifest_loading_time);
  659. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  660. pal_control.allocation_time);
  661. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  662. pal_control.tail_startup_time);
  663. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  664. pal_control.child_creation_time);
  665. if (thread_start_event)
  666. DkEventSet(thread_start_event);
  667. shim_tcb_t * cur_tcb = shim_get_tls();
  668. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  669. if (cur_tcb->context.sp)
  670. restore_context(&cur_tcb->context);
  671. if (cur_thread->exec)
  672. execute_elf_object(cur_thread->exec, argcp, argp, auxp);
  673. shim_do_exit(0);
  674. }
  675. static int create_unique (int (*mkname) (char *, size_t, void *),
  676. int (*create) (const char *, void *),
  677. int (*output) (char *, size_t, const void *,
  678. struct shim_qstr *),
  679. char * name, size_t size, void * id, void * obj,
  680. struct shim_qstr * qstr)
  681. {
  682. int ret, len;
  683. while (1) {
  684. len = mkname(name, size, id);
  685. if (len < 0)
  686. return len;
  687. if ((ret = create(name, obj)) < 0)
  688. return ret;
  689. if (ret)
  690. continue;
  691. if (output)
  692. return output(name, size, id, qstr);
  693. if (qstr)
  694. qstrsetstr(qstr, name, len);
  695. return len;
  696. }
  697. }
  698. static int name_pipe (char * uri, size_t size, void * id)
  699. {
  700. IDTYPE pipeid;
  701. int len;
  702. int ret = DkRandomBitsRead(&pipeid, sizeof(pipeid));
  703. if (ret < 0)
  704. return -convert_pal_errno(-ret);
  705. debug("creating pipe: pipe.srv:%u\n", pipeid);
  706. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  707. return -ERANGE;
  708. *((IDTYPE *) id) = pipeid;
  709. return len;
  710. }
  711. static int open_pipe (const char * uri, void * obj)
  712. {
  713. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  714. if (!pipe)
  715. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  716. -PAL_ERRNO;
  717. if (obj)
  718. *((PAL_HANDLE *) obj) = pipe;
  719. else
  720. DkObjectClose(pipe);
  721. return 0;
  722. }
  723. static int pipe_addr (char * uri, size_t size, const void * id,
  724. struct shim_qstr * qstr)
  725. {
  726. IDTYPE pipeid = *((IDTYPE *) id);
  727. int len;
  728. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  729. return -ERANGE;
  730. if (qstr)
  731. qstrsetstr(qstr, uri, len);
  732. return len;
  733. }
  734. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  735. struct shim_qstr * qstr)
  736. {
  737. IDTYPE pipeid;
  738. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  739. uri, size, &pipeid, hdl, qstr);
  740. if (ret > 0 && id)
  741. *id = pipeid;
  742. return ret;
  743. }
  744. static int name_path (char * path, size_t size, void * id)
  745. {
  746. unsigned int suffix;
  747. int prefix_len = strlen(path);
  748. int len;
  749. int ret = DkRandomBitsRead(&suffix, sizeof(suffix));
  750. if (ret < 0)
  751. return -convert_pal_errno(-ret);
  752. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  753. if (len == size)
  754. return -ERANGE;
  755. *((unsigned int *) id) = suffix;
  756. return prefix_len + len;
  757. }
  758. static int open_dir (const char * path, void * obj)
  759. {
  760. struct shim_handle * dir = NULL;
  761. if (obj) {
  762. dir = get_new_handle();
  763. if (!dir)
  764. return -ENOMEM;
  765. }
  766. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  767. NULL);
  768. if (ret < 0)
  769. return ret = -EEXIST ? 1 : ret;
  770. if (obj)
  771. *((struct shim_handle **) obj) = dir;
  772. return 0;
  773. }
  774. static int open_file (const char * path, void * obj)
  775. {
  776. struct shim_handle * file = NULL;
  777. if (obj) {
  778. file = get_new_handle();
  779. if (!file)
  780. return -ENOMEM;
  781. }
  782. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  783. NULL);
  784. if (ret < 0)
  785. return ret = -EEXIST ? 1 : ret;
  786. if (obj)
  787. *((struct shim_handle **) obj) = file;
  788. return 0;
  789. }
  790. static int open_pal_handle (const char * uri, void * obj)
  791. {
  792. PAL_HANDLE hdl;
  793. if (strpartcmp_static(uri, "dev:"))
  794. hdl = DkStreamOpen(uri, 0,
  795. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  796. PAL_SHARE_OWNER_R,
  797. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  798. 0);
  799. else
  800. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  801. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  802. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  803. 0);
  804. if (!hdl) {
  805. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  806. return 0;
  807. else
  808. return -PAL_ERRNO;
  809. }
  810. if (obj) {
  811. *((PAL_HANDLE *) obj) = hdl;
  812. } else {
  813. DkObjectClose(hdl);
  814. }
  815. return 0;
  816. }
  817. static int output_path (char * path, size_t size, const void * id,
  818. struct shim_qstr * qstr)
  819. {
  820. int len = strlen(path);
  821. if (qstr)
  822. qstrsetstr(qstr, path, len);
  823. return len;
  824. }
  825. int create_dir (const char * prefix, char * path, size_t size,
  826. struct shim_handle ** hdl)
  827. {
  828. unsigned int suffix;
  829. if (prefix) {
  830. int len = strlen(prefix);
  831. if (len >= size)
  832. return -ERANGE;
  833. memcpy(path, prefix, len + 1);
  834. }
  835. return create_unique(&name_path, &open_dir, &output_path, path, size,
  836. &suffix, hdl, NULL);
  837. }
  838. int create_file (const char * prefix, char * path, size_t size,
  839. struct shim_handle ** hdl)
  840. {
  841. unsigned int suffix;
  842. if (prefix) {
  843. int len = strlen(prefix);
  844. if (len >= size)
  845. return -ERANGE;
  846. memcpy(path, prefix, len + 1);
  847. }
  848. return create_unique(&name_path, &open_file, &output_path, path, size,
  849. &suffix, hdl, NULL);
  850. }
  851. int create_handle (const char * prefix, char * uri, size_t size,
  852. PAL_HANDLE * hdl, unsigned int * id)
  853. {
  854. unsigned int suffix;
  855. if (prefix) {
  856. int len = strlen(prefix);
  857. if (len >= size)
  858. return -ERANGE;
  859. memcpy(uri, prefix, len + 1);
  860. }
  861. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  862. id ? : &suffix, hdl, NULL);
  863. }
  864. void check_stack_hook (void)
  865. {
  866. struct shim_thread * cur_thread = get_cur_thread();
  867. void * rsp;
  868. __asm__ volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  869. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  870. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  871. SYS_PRINTF("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  872. rsp, cur_thread->stack, cur_thread->stack_top);
  873. } else {
  874. SYS_PRINTF("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  875. rsp, cur_thread->stack, cur_thread->stack_top);
  876. }
  877. }
  878. #ifdef PROFILE
  879. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  880. int level)
  881. {
  882. unsigned long total_interval_time = 0;
  883. unsigned long total_interval_count = 0;
  884. for (int i = 0 ; i < N_PROFILE ; i++) {
  885. struct shim_profile * profile = &PROFILES[i];
  886. if (profile->root != root || profile->disabled)
  887. continue;
  888. switch (profile->type) {
  889. case OCCURENCE: {
  890. unsigned int count =
  891. atomic_read(&profile->val.occurence.count);
  892. if (count) {
  893. for (int j = 0 ; j < level ; j++)
  894. __SYS_FPRINTF(hdl, " ");
  895. __SYS_FPRINTF(hdl, "- %s: %u times\n", profile->name, count);
  896. }
  897. break;
  898. }
  899. case INTERVAL: {
  900. unsigned int count =
  901. atomic_read(&profile->val.interval.count);
  902. if (count) {
  903. unsigned long time =
  904. atomic_read(&profile->val.interval.time);
  905. unsigned long ind_time = time / count;
  906. total_interval_time += time;
  907. total_interval_count += count;
  908. for (int j = 0 ; j < level ; j++)
  909. __SYS_FPRINTF(hdl, " ");
  910. __SYS_FPRINTF(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  911. time, profile->name, count, ind_time);
  912. }
  913. break;
  914. }
  915. case CATEGORY:
  916. for (int j = 0 ; j < level ; j++)
  917. __SYS_FPRINTF(hdl, " ");
  918. __SYS_FPRINTF(hdl, "- %s:\n", profile->name);
  919. print_profile_result(hdl, profile, level + 1);
  920. break;
  921. }
  922. }
  923. if (total_interval_count) {
  924. __SYS_FPRINTF(hdl, " - (%11.11u) total: %u times, %lu msec\n",
  925. total_interval_time, total_interval_count,
  926. total_interval_time / total_interval_count);
  927. }
  928. }
  929. #endif /* PROFILE */
  930. static struct atomic_int in_terminate = { .counter = 0, };
  931. noreturn void shim_terminate (int err)
  932. {
  933. debug("teminating the whole process (%d)\n", err);
  934. /* do last clean-up of the process */
  935. shim_clean(err);
  936. DkProcessExit(err);
  937. }
  938. /* cleanup and terminate process, preserve exit code if err == 0 */
  939. int shim_clean (int err)
  940. {
  941. /* preventing multiple cleanup, this is mostly caused by
  942. assertion in shim_clean */
  943. atomic_inc(&in_terminate);
  944. if (atomic_read(&in_terminate) > 1)
  945. return 0;
  946. if (err != 0)
  947. cur_process.exit_code = err;
  948. store_all_msg_persist();
  949. #ifdef PROFILE
  950. if (ENTER_TIME) {
  951. switch (shim_get_tls()->context.syscall_nr) {
  952. case __NR_exit_group:
  953. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  954. break;
  955. case __NR_exit:
  956. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  957. break;
  958. }
  959. }
  960. if (ipc_cld_profile_send()) {
  961. MASTER_LOCK();
  962. PAL_HANDLE hdl = __open_shim_stdio();
  963. if (hdl) {
  964. __SYS_FPRINTF(hdl, "******************************\n");
  965. __SYS_FPRINTF(hdl, "profiling:\n");
  966. print_profile_result(hdl, &profile_root, 0);
  967. __SYS_FPRINTF(hdl, "******************************\n");
  968. }
  969. MASTER_UNLOCK();
  970. DkObjectClose(hdl);
  971. }
  972. #endif
  973. del_all_ipc_ports(0);
  974. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  975. DkObjectClose(shim_stdio);
  976. shim_stdio = NULL;
  977. debug("process %u exited with status %d\n", cur_process.vmid & 0xFFFF, cur_process.exit_code);
  978. MASTER_LOCK();
  979. DkProcessExit(cur_process.exit_code);
  980. return 0;
  981. }
  982. int message_confirm (const char * message, const char * options)
  983. {
  984. char answer;
  985. int noptions = strlen(options);
  986. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  987. int ret = 0;
  988. *(str++) = ' ';
  989. *(str++) = '[';
  990. for (int i = 0 ; i < noptions ; i++) {
  991. *(str++) = options[i];
  992. *(str++) = '/';
  993. }
  994. str--;
  995. *(str++) = ']';
  996. *(str++) = ' ';
  997. MASTER_LOCK();
  998. PAL_HANDLE hdl = __open_shim_stdio();
  999. if (!hdl) {
  1000. MASTER_UNLOCK();
  1001. return -EACCES;
  1002. }
  1003. #define WRITE(buf, len) \
  1004. ({ int _ret = DkStreamWrite(hdl, 0, len, (void*)(buf), NULL); \
  1005. _ret ? : -PAL_ERRNO; })
  1006. #define READ(buf, len) \
  1007. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  1008. _ret ? : -PAL_ERRNO; })
  1009. if ((ret = WRITE(message, strlen(message))) < 0)
  1010. goto out;
  1011. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  1012. goto out;
  1013. if ((ret = READ(&answer, 1)) < 0)
  1014. goto out;
  1015. out:
  1016. DkObjectClose(hdl);
  1017. MASTER_UNLOCK();
  1018. return (ret < 0) ? ret : answer;
  1019. }