shim_init.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_tls.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_vers_2_17 = GLIBC_VERSION_2_17;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. SHIM_GET_TLS()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  45. }
  46. void __assert_fail (const char * assertion, const char * file,
  47. unsigned int line, const char * function)
  48. {
  49. __sys_printf("assert failed %s:%d %s\n", file, line, assertion);
  50. pause();
  51. shim_terminate();
  52. }
  53. void __stack_chk_fail (void)
  54. {
  55. }
  56. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  57. /* reserved */ 0,
  58. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  59. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  60. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  61. /* PAL_ERROR_INVAL */ EINVAL,
  62. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  63. /* PAL_ERROR_DENIED */ EACCES,
  64. /* PAL_ERROR_BADHANDLE */ EFAULT,
  65. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  66. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  67. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  68. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  69. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  70. /* PAL_ERROR_INTERRUPTED */ EINTR,
  71. /* PAL_ERROR_OVERFLOW */ EFAULT,
  72. /* PAL_ERROR_BADADDR */ EFAULT,
  73. /* PAL_ERROR_NOMEM */ ENOMEM,
  74. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  75. /* PAL_ERROR_INCONSIST */ EFAULT,
  76. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  77. /* PAL_ERROR_ENDOFSTREAM */ 0,
  78. /* PAL_ERROR_NOTSERVER */ EINVAL,
  79. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  80. /* PAL_ERROR_ZEROSIZE */ 0,
  81. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  82. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  83. };
  84. long convert_pal_errno (long err)
  85. {
  86. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  87. pal_errno_to_unix_errno[err] : 0;
  88. }
  89. unsigned long parse_int (const char * str)
  90. {
  91. unsigned long num = 0;
  92. int radix = 10;
  93. char c;
  94. if (str[0] == '0') {
  95. str++;
  96. radix = 8;
  97. if (str[0] == 'x') {
  98. str++;
  99. radix = 16;
  100. }
  101. }
  102. while ((c = *(str++))) {
  103. int val;
  104. if (c >= 'A' && c <= 'F')
  105. val = c - 'A' + 10;
  106. else if (c >= 'a' && c <= 'f')
  107. val = c - 'a' + 10;
  108. else if (c >= '0' && c <= '9')
  109. val = c - '0';
  110. else
  111. break;
  112. if (val >= radix)
  113. break;
  114. num = num * radix + val;
  115. }
  116. if (c == 'G' || c == 'g')
  117. num *= 1024 * 1024 * 1024;
  118. else if (c == 'M' || c == 'm')
  119. num *= 1024 * 1024;
  120. else if (c == 'K' || c == 'k')
  121. num *= 1024;
  122. return num;
  123. }
  124. long int glibc_option (const char * opt)
  125. {
  126. char cfg[CONFIG_MAX];
  127. if (!memcmp(opt, "heap_size", 9)) {
  128. int ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  129. if (ret < 0) {
  130. debug("no glibc option: %s (err=%d)\n", opt, ret);
  131. return -ENOENT;
  132. }
  133. long int heap_size = parse_int(cfg);
  134. debug("glibc option: heap_size = %ld\n", heap_size);
  135. return (long int) heap_size;
  136. }
  137. return -EINVAL;
  138. }
  139. void * migrated_memory_start;
  140. void * migrated_memory_end;
  141. void * migrated_shim_addr;
  142. void * initial_stack;
  143. const char ** initial_envp __attribute_migratable;
  144. const char ** library_paths;
  145. LOCKTYPE __master_lock;
  146. bool lock_enabled;
  147. bool in_gdb;
  148. void init_tcb (shim_tcb_t * tcb)
  149. {
  150. tcb->canary = SHIM_TLS_CANARY;
  151. tcb->self = tcb;
  152. }
  153. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  154. {
  155. memset(new_tcb, 0, sizeof(shim_tcb_t));
  156. new_tcb->canary = SHIM_TLS_CANARY;
  157. new_tcb->self = new_tcb;
  158. new_tcb->tp = old_tcb->tp;
  159. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  160. new_tcb->tid = old_tcb->tid;
  161. new_tcb->debug_buf = old_tcb->debug_buf;
  162. }
  163. /* This function is used to allocate tls before interpreter start running */
  164. void allocate_tls (void * tcb_location, bool user, struct shim_thread * thread)
  165. {
  166. __libc_tcb_t * tcb = tcb_location;
  167. assert(tcb);
  168. tcb->tcb = tcb;
  169. init_tcb(&tcb->shim_tcb);
  170. if (thread) {
  171. thread->tcb = tcb;
  172. thread->user_tcb = user;
  173. tcb->shim_tcb.tp = thread;
  174. tcb->shim_tcb.tid = thread->tid;
  175. } else {
  176. tcb->shim_tcb.tp = NULL;
  177. tcb->shim_tcb.tid = 0;
  178. }
  179. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  180. assert(SHIM_TLS_CHECK_CANARY());
  181. }
  182. void populate_tls (void * tcb_location, bool user)
  183. {
  184. __libc_tcb_t * tcb = (__libc_tcb_t *) tcb_location;
  185. assert(tcb);
  186. tcb->tcb = tcb;
  187. copy_tcb(&tcb->shim_tcb, SHIM_GET_TLS());
  188. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  189. if (thread) {
  190. thread->tcb = tcb;
  191. thread->user_tcb = user;
  192. }
  193. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  194. assert(SHIM_TLS_CHECK_CANARY());
  195. }
  196. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  197. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  198. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  199. void * allocate_stack (size_t size, size_t protect_size, bool user)
  200. {
  201. size = ALIGN_UP(size);
  202. protect_size = ALIGN_UP(protect_size);
  203. /* preserve a non-readable, non-writeable page below the user
  204. stack to stop user program to clobber other vmas */
  205. void * stack = user ?
  206. get_unmapped_vma(size + protect_size, STACK_FLAGS) :
  207. NULL;
  208. if (user)
  209. stack = (void *) DkVirtualMemoryAlloc(stack, size + protect_size,
  210. 0, PAL_PROT_READ|PAL_PROT_WRITE);
  211. else
  212. stack = system_malloc(size + protect_size);
  213. if (!stack)
  214. return NULL;
  215. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  216. INC_PROFILE_OCCURENCE(alloc_stack_count);
  217. if (protect_size &&
  218. !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
  219. return NULL;
  220. stack += protect_size;
  221. if (user) {
  222. if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
  223. STACK_FLAGS, NULL, 0, "[stack]") < 0)
  224. return NULL;
  225. if (protect_size &&
  226. bkeep_mmap(stack - protect_size, protect_size, 0,
  227. STACK_FLAGS, NULL, 0, NULL) < 0)
  228. return NULL;
  229. }
  230. debug("allocated stack at %p (size = %d)\n", stack, size);
  231. return stack;
  232. }
  233. int populate_user_stack (void * stack, size_t stack_size,
  234. int nauxv, elf_auxv_t ** auxpp,
  235. const char *** argvp, const char *** envpp)
  236. {
  237. const char ** argv = *argvp, ** envp = *envpp;
  238. const char ** new_argv = NULL, ** new_envp = NULL;
  239. void * stack_bottom = stack;
  240. void * stack_top = stack + stack_size;
  241. #define ALLOCATE_TOP(size) \
  242. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  243. stack_top; })
  244. #define ALLOCATE_BOTTOM(size) \
  245. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  246. stack_bottom - (size); })
  247. if (!argv) {
  248. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  249. goto copy_envp;
  250. }
  251. new_argv = stack_bottom;
  252. while (argv) {
  253. for (const char ** a = argv ; *a ; a++) {
  254. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  255. int len = strlen(*a) + 1;
  256. char * abuf = ALLOCATE_TOP(len);
  257. memcpy(abuf, *a, len);
  258. *t = abuf;
  259. }
  260. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  261. copy_envp:
  262. if (!envp)
  263. break;
  264. new_envp = stack_bottom;
  265. argv = envp;
  266. envp = NULL;
  267. }
  268. if (!new_envp)
  269. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  270. stack_bottom = (void *) ((unsigned long) stack_bottom & ~7UL);
  271. *((unsigned long *) ALLOCATE_TOP(sizeof(unsigned long))) = 0;
  272. if (nauxv) {
  273. elf_auxv_t * old_auxp = *auxpp;
  274. *auxpp = ALLOCATE_TOP(sizeof(elf_auxv_t) * nauxv);
  275. if (old_auxp)
  276. memcpy(*auxpp, old_auxp, nauxv * sizeof(elf_auxv_t));
  277. }
  278. memmove(stack_top - (stack_bottom - stack), stack, stack_bottom - stack);
  279. if (new_argv)
  280. *argvp = (void *) new_argv + (stack_top - stack_bottom);
  281. if (new_envp)
  282. *envpp = (void *) new_envp + (stack_top - stack_bottom);
  283. return 0;
  284. }
  285. unsigned long sys_stack_size = 0;
  286. int init_stack (const char ** argv, const char ** envp, const char *** argpp,
  287. int nauxv, elf_auxv_t ** auxpp)
  288. {
  289. if (!sys_stack_size) {
  290. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  291. if (root_config) {
  292. char stack_cfg[CONFIG_MAX];
  293. if (get_config(root_config, "sys.stack.size", stack_cfg,
  294. CONFIG_MAX) > 0)
  295. sys_stack_size = ALIGN_UP(parse_int(stack_cfg));
  296. }
  297. }
  298. struct shim_thread * cur_thread = get_cur_thread();
  299. if (!cur_thread || cur_thread->stack)
  300. return 0;
  301. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  302. if (!stack)
  303. return -ENOMEM;
  304. if (initial_envp)
  305. envp = initial_envp;
  306. int ret = populate_user_stack(stack, sys_stack_size,
  307. nauxv, auxpp, &argv, &envp);
  308. if (ret < 0)
  309. return ret;
  310. *argpp = argv;
  311. initial_envp = envp;
  312. cur_thread->stack_top = stack + sys_stack_size;
  313. cur_thread->stack = stack;
  314. cur_thread->stack_red = stack - allocsize;
  315. return 0;
  316. }
  317. int read_environs (const char ** envp)
  318. {
  319. for (const char ** e = envp ; *e ; e++) {
  320. switch ((*e)[0]) {
  321. case 'L': {
  322. if (!memcmp(*e, "LD_LIBRARY_PATH=", 16)) {
  323. int npaths = 0;
  324. for (const char * s = (*e) + 16 ; *s ; s++)
  325. if (*s == ':')
  326. npaths++;
  327. const char ** paths = malloc(sizeof(const char *) *
  328. (npaths + 1));
  329. if (!paths)
  330. return -ENOMEM;
  331. const char * s = (*e) + 16, * next;
  332. int cnt = 0;
  333. while (*s) {
  334. for (next = s ; *next && *next != ':' ; next++);
  335. int len = next - s;
  336. char * str = malloc(len + 1);
  337. if (!str)
  338. return -ENOMEM;
  339. memcpy(str, s, len);
  340. str[len] = 0;
  341. paths[cnt++] = str;
  342. s = *next ? next + 1 : next;
  343. }
  344. paths[cnt] = NULL;
  345. library_paths = paths;
  346. break;
  347. }
  348. break;
  349. }
  350. case 'I': {
  351. if (!memcmp(*e, "IN_GDB=1", 8)) {
  352. in_gdb = true;
  353. break;
  354. }
  355. break;
  356. }
  357. }
  358. }
  359. return 0;
  360. }
  361. struct config_store * root_config = NULL;
  362. static void * __malloc (int size)
  363. {
  364. return malloc(size);
  365. }
  366. static void __free (void * mem)
  367. {
  368. free(mem);
  369. }
  370. int init_manifest (PAL_HANDLE manifest_handle)
  371. {
  372. void * addr;
  373. unsigned int size;
  374. if (PAL_CB(manifest_preload.start)) {
  375. addr = PAL_CB(manifest_preload.start);
  376. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  377. } else {
  378. PAL_STREAM_ATTR attr;
  379. if (!DkStreamAttributesQuerybyHandle(manifest_handle, &attr))
  380. return -PAL_ERRNO;
  381. size = attr.pending_size;
  382. addr = (void *) DkStreamMap(manifest_handle, NULL,
  383. PAL_PROT_READ, 0,
  384. ALIGN_UP(size));
  385. if (!addr)
  386. return -PAL_ERRNO;
  387. }
  388. bkeep_mmap(addr, ALIGN_UP(size), PROT_READ,
  389. MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0,
  390. "[manifest]");
  391. root_config = malloc(sizeof(struct config_store));
  392. root_config->raw_data = addr;
  393. root_config->raw_size = size;
  394. root_config->malloc = __malloc;
  395. root_config->free = __free;
  396. const char * errstring = "Unexpected error";
  397. int ret = 0;
  398. if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
  399. root_config = NULL;
  400. sys_printf("Unable to read manifest file: %s\n", errstring);
  401. return ret;
  402. }
  403. return 0;
  404. }
  405. #ifdef PROFILE
  406. struct shim_profile profile_root;
  407. #endif
  408. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  409. do { \
  410. void *_tmp = (cookie); \
  411. (argv) = _tmp; \
  412. _tmp += sizeof(char *) * ((argc) + 1); \
  413. (envp) = _tmp; \
  414. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  415. (auxp) = _tmp + sizeof(char *); \
  416. } while (0)
  417. static void * __process_auxv (elf_auxv_t * auxp)
  418. {
  419. elf_auxv_t * av;
  420. for (av = auxp; av->a_type != AT_NULL; av++)
  421. switch (av->a_type) {
  422. default: break;
  423. }
  424. return av + 1;
  425. }
  426. #define FIND_LAST_STACK(stack) \
  427. do { \
  428. /* check if exist a NULL end */ \
  429. assert(*(uint64_t *) stack == 0); \
  430. stack += sizeof(uint64_t); \
  431. } while (0)
  432. #ifdef PROFILE
  433. static void set_profile_enabled (const char ** envp)
  434. {
  435. const char ** p;
  436. for (p = envp ; (*p) ; p++)
  437. if (!memcmp(*p, "PROFILE_ENABLED=", 16))
  438. break;
  439. if (!(*p))
  440. return;
  441. for (int i = 0 ; i < N_PROFILE ; i++)
  442. PROFILES[i].disabled = true;
  443. const char * str = (*p) + 16;
  444. bool enabled = false;
  445. while (*str) {
  446. const char * next = str;
  447. for ( ; (*next) && (*next) != ',' ; next++);
  448. if (next > str) {
  449. int len = next - str;
  450. for (int i = 0 ; i < N_PROFILE ; i++) {
  451. struct shim_profile * profile = &PROFILES[i];
  452. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  453. profile->disabled = false;
  454. if (profile->type == CATAGORY)
  455. enabled = true;
  456. }
  457. }
  458. }
  459. str = (*next) ? next + 1 : next;
  460. }
  461. while (enabled) {
  462. enabled = false;
  463. for (int i = 0 ; i < N_PROFILE ; i++) {
  464. struct shim_profile * profile = &PROFILES[i];
  465. if (!profile->disabled || profile->root == &profile_)
  466. continue;
  467. if (!profile->root->disabled) {
  468. profile->disabled = false;
  469. if (profile->type == CATAGORY)
  470. enabled = true;
  471. }
  472. }
  473. }
  474. for (int i = 0 ; i < N_PROFILE ; i++) {
  475. struct shim_profile * profile = &PROFILES[i];
  476. if (profile->type == CATAGORY || profile->disabled)
  477. continue;
  478. for (profile = profile->root ;
  479. profile != &profile_ && profile->disabled ;
  480. profile = profile->root)
  481. profile->disabled = false;
  482. }
  483. }
  484. #endif
  485. static int init_newproc (struct newproc_header * hdr)
  486. {
  487. BEGIN_PROFILE_INTERVAL();
  488. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  489. sizeof(struct newproc_header), hdr,
  490. NULL, 0);
  491. if (!bytes)
  492. return -PAL_ERRNO;
  493. SAVE_PROFILE_INTERVAL(child_wait_header);
  494. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  495. return hdr->failure;
  496. }
  497. DEFINE_PROFILE_CATAGORY(pal, );
  498. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  499. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  500. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  501. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  502. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  503. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  504. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  505. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  506. DEFINE_PROFILE_CATAGORY(init, );
  507. DEFINE_PROFILE_INTERVAL(init_randgen, init);
  508. DEFINE_PROFILE_INTERVAL(init_heap, init);
  509. DEFINE_PROFILE_INTERVAL(init_slab, init);
  510. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  511. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  512. DEFINE_PROFILE_INTERVAL(init_vma, init);
  513. DEFINE_PROFILE_INTERVAL(init_fs, init);
  514. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  515. DEFINE_PROFILE_INTERVAL(init_handle, init);
  516. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  517. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  518. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  519. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  520. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  521. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  522. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  523. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  524. DEFINE_PROFILE_INTERVAL(init_thread, init);
  525. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  526. DEFINE_PROFILE_INTERVAL(init_mount, init);
  527. DEFINE_PROFILE_INTERVAL(init_async, init);
  528. DEFINE_PROFILE_INTERVAL(init_stack, init);
  529. DEFINE_PROFILE_INTERVAL(read_environs, init);
  530. DEFINE_PROFILE_INTERVAL(init_loader, init);
  531. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  532. DEFINE_PROFILE_INTERVAL(init_signal, init);
  533. #define CALL_INIT(func, args ...) func(args)
  534. #define RUN_INIT(func, ...) \
  535. do { \
  536. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  537. if (_err < 0) { \
  538. sys_printf("shim initialization failed in " #func " (%d)", \
  539. _err); \
  540. shim_terminate(); \
  541. } \
  542. SAVE_PROFILE_INTERVAL(func); \
  543. } while (0)
  544. extern PAL_HANDLE thread_start_event;
  545. int shim_init (int argc, void * args, void ** return_stack)
  546. {
  547. debug_handle = PAL_CB(debug_stream);
  548. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  549. /* create the initial TCB, shim can not be run without a tcb */
  550. __libc_tcb_t tcb;
  551. memset(&tcb, 0, sizeof(__libc_tcb_t));
  552. allocate_tls(&tcb, false, NULL);
  553. debug_setbuf(&tcb.shim_tcb, true);
  554. debug("set tcb to %p\n", &tcb);
  555. #ifdef PROFILE
  556. unsigned long begin_time = GET_PROFILE_INTERVAL();
  557. #endif
  558. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE, 0);
  559. allocsize = PAL_CB(alloc_align);
  560. allocshift = allocsize - 1;
  561. allocmask = ~allocshift;
  562. create_lock(__master_lock);
  563. const char ** argv, ** envp, ** argp = NULL;
  564. elf_auxv_t * auxp;
  565. /* call to figure out where the arguments are */
  566. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  567. initial_stack = __process_auxv(auxp);
  568. int nauxv = (elf_auxv_t *) initial_stack - auxp;
  569. FIND_LAST_STACK(initial_stack);
  570. #ifdef PROFILE
  571. set_profile_enabled(envp);
  572. #endif
  573. struct newproc_header hdr;
  574. void * cpaddr = NULL;
  575. #ifdef PROFILE
  576. unsigned long begin_create_time = 0;
  577. #endif
  578. BEGIN_PROFILE_INTERVAL();
  579. RUN_INIT(init_randgen);
  580. RUN_INIT(init_heap);
  581. RUN_INIT(init_slab);
  582. RUN_INIT(read_environs, envp);
  583. RUN_INIT(init_str_mgr);
  584. RUN_INIT(init_internal_map);
  585. RUN_INIT(init_vma);
  586. RUN_INIT(init_fs);
  587. RUN_INIT(init_dcache);
  588. RUN_INIT(init_handle);
  589. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  590. if (argc && argv[0][0] == '-') {
  591. if (!memcmp(argv[0], "-resume", 8) && argc >= 2) {
  592. const char * filename = *(argv + 1);
  593. argc -= 2;
  594. argv += 2;
  595. RUN_INIT(init_mount_root);
  596. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  597. &cpaddr);
  598. goto restore;
  599. }
  600. }
  601. if (PAL_CB(parent_process)) {
  602. RUN_INIT(init_newproc, &hdr);
  603. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  604. hdr.create_time, begin_time);
  605. #ifdef PROFILE
  606. begin_create_time = hdr.begin_create_time;
  607. #endif
  608. if (hdr.checkpoint.hdr.size)
  609. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  610. }
  611. if (cpaddr) {
  612. restore:
  613. thread_start_event = DkNotificationEventCreate(0);
  614. RUN_INIT(restore_checkpoint,
  615. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  616. (ptr_t) cpaddr, 0);
  617. }
  618. if (PAL_CB(manifest_handle))
  619. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  620. RUN_INIT(init_mount_root);
  621. RUN_INIT(init_ipc);
  622. RUN_INIT(init_thread);
  623. RUN_INIT(init_mount);
  624. RUN_INIT(init_important_handles);
  625. RUN_INIT(init_async);
  626. RUN_INIT(init_stack, argv, envp, &argp, nauxv, &auxp);
  627. RUN_INIT(init_loader);
  628. RUN_INIT(init_ipc_helper);
  629. RUN_INIT(init_signal);
  630. debug("shim process initialized\n");
  631. #ifdef PROFILE
  632. if (begin_create_time)
  633. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  634. begin_create_time);
  635. #endif
  636. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  637. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  638. pal_control.host_specific_startup_time);
  639. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  640. pal_control.relocation_time);
  641. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  642. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  643. pal_control.manifest_loading_time);
  644. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  645. pal_control.allocation_time);
  646. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  647. pal_control.tail_startup_time);
  648. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  649. pal_control.child_creation_time);
  650. if (thread_start_event)
  651. DkEventSet(thread_start_event);
  652. shim_tcb_t * cur_tcb = SHIM_GET_TLS();
  653. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  654. if (cur_tcb->context.sp)
  655. restore_context(&cur_tcb->context);
  656. if (cur_thread->exec)
  657. execute_elf_object(cur_thread->exec,
  658. argc, argp, nauxv, auxp);
  659. *return_stack = initial_stack;
  660. return 0;
  661. }
  662. static int create_unique (int (*mkname) (char *, size_t, void *),
  663. int (*create) (const char *, void *),
  664. int (*output) (char *, size_t, const void *,
  665. struct shim_qstr *),
  666. char * name, size_t size, void * id, void * obj,
  667. struct shim_qstr * qstr)
  668. {
  669. int ret, len;
  670. while (1) {
  671. len = mkname(name, size, id);
  672. if (len < 0)
  673. return len;
  674. if ((ret = create(name, obj)) < 0)
  675. return ret;
  676. if (ret)
  677. continue;
  678. if (output)
  679. return output(name, size, id, qstr);
  680. if (qstr)
  681. qstrsetstr(qstr, name, len);
  682. return len;
  683. }
  684. }
  685. static int name_pipe (char * uri, size_t size, void * id)
  686. {
  687. IDTYPE pipeid;
  688. int len;
  689. if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
  690. return -EACCES;
  691. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  692. return -ERANGE;
  693. *((IDTYPE *) id) = pipeid;
  694. return len;
  695. }
  696. static int open_pipe (const char * uri, void * obj)
  697. {
  698. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  699. if (!pipe)
  700. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  701. -PAL_ERRNO;
  702. if (obj)
  703. *((PAL_HANDLE *) obj) = pipe;
  704. else
  705. DkObjectClose(pipe);
  706. return 0;
  707. }
  708. static int pipe_addr (char * uri, size_t size, const void * id,
  709. struct shim_qstr * qstr)
  710. {
  711. IDTYPE pipeid = *((IDTYPE *) id);
  712. int len;
  713. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  714. return -ERANGE;
  715. if (qstr)
  716. qstrsetstr(qstr, uri, len);
  717. return len;
  718. }
  719. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  720. struct shim_qstr * qstr)
  721. {
  722. IDTYPE pipeid;
  723. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  724. uri, size, &pipeid, hdl, qstr);
  725. if (ret > 0 && id)
  726. *id = pipeid;
  727. return ret;
  728. }
  729. static int name_path (char * path, size_t size, void * id)
  730. {
  731. unsigned int suffix;
  732. int prefix_len = strlen(path);
  733. int len;
  734. if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
  735. return -EACCES;
  736. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  737. if (len == size)
  738. return -ERANGE;
  739. *((unsigned int *) id) = suffix;
  740. return prefix_len + len;
  741. }
  742. static int open_dir (const char * path, void * obj)
  743. {
  744. struct shim_handle * dir = NULL;
  745. if (obj) {
  746. dir = get_new_handle();
  747. if (!dir)
  748. return -ENOMEM;
  749. }
  750. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  751. NULL);
  752. if (ret < 0)
  753. return ret = -EEXIST ? 1 : ret;
  754. if (obj)
  755. *((struct shim_handle **) obj) = dir;
  756. return 0;
  757. }
  758. static int open_file (const char * path, void * obj)
  759. {
  760. struct shim_handle * file = NULL;
  761. if (obj) {
  762. file = get_new_handle();
  763. if (!file)
  764. return -ENOMEM;
  765. }
  766. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  767. NULL);
  768. if (ret < 0)
  769. return ret = -EEXIST ? 1 : ret;
  770. if (obj)
  771. *((struct shim_handle **) obj) = file;
  772. return 0;
  773. }
  774. static int open_pal_handle (const char * uri, void * obj)
  775. {
  776. PAL_HANDLE hdl;
  777. if (!memcmp(uri, "dir:", 4))
  778. hdl = DkStreamOpen(uri, 0,
  779. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  780. PAL_SHARE_OWNER_R,
  781. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  782. 0);
  783. else
  784. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  785. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  786. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  787. 0);
  788. if (!hdl) {
  789. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  790. return 0;
  791. else
  792. return -PAL_ERRNO;
  793. }
  794. if (obj)
  795. *((PAL_HANDLE *) obj) = hdl;
  796. return 0;
  797. }
  798. static int output_path (char * path, size_t size, const void * id,
  799. struct shim_qstr * qstr)
  800. {
  801. int len = strlen(path);
  802. if (qstr)
  803. qstrsetstr(qstr, path, len);
  804. return len;
  805. }
  806. int create_dir (const char * prefix, char * path, size_t size,
  807. struct shim_handle ** hdl)
  808. {
  809. unsigned int suffix;
  810. if (prefix) {
  811. int len = strlen(prefix);
  812. if (len >= size)
  813. return -ERANGE;
  814. memcpy(path, prefix, len + 1);
  815. }
  816. return create_unique(&name_path, &open_dir, &output_path, path, size,
  817. &suffix, hdl, NULL);
  818. }
  819. int create_file (const char * prefix, char * path, size_t size,
  820. struct shim_handle ** hdl)
  821. {
  822. unsigned int suffix;
  823. if (prefix) {
  824. int len = strlen(prefix);
  825. if (len >= size)
  826. return -ERANGE;
  827. memcpy(path, prefix, len + 1);
  828. }
  829. return create_unique(&name_path, &open_file, &output_path, path, size,
  830. &suffix, hdl, NULL);
  831. }
  832. int create_handle (const char * prefix, char * uri, size_t size,
  833. PAL_HANDLE * hdl, unsigned int * id)
  834. {
  835. unsigned int suffix;
  836. if (prefix) {
  837. int len = strlen(prefix);
  838. if (len >= size)
  839. return -ERANGE;
  840. memcpy(uri, prefix, len + 1);
  841. }
  842. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  843. id ? : &suffix, hdl, NULL);
  844. }
  845. void check_stack_hook (void)
  846. {
  847. struct shim_thread * cur_thread = get_cur_thread();
  848. void * rsp;
  849. asm volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  850. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  851. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  852. sys_printf("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  853. rsp, cur_thread->stack, cur_thread->stack_top);
  854. } else {
  855. sys_printf("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  856. rsp, cur_thread->stack, cur_thread->stack_top);
  857. }
  858. }
  859. #ifdef PROFILE
  860. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  861. int level)
  862. {
  863. unsigned long total_interval_time = 0;
  864. unsigned long total_interval_count = 0;
  865. for (int i = 0 ; i < N_PROFILE ; i++) {
  866. struct shim_profile * profile = &PROFILES[i];
  867. if (profile->root != root || profile->disabled)
  868. continue;
  869. switch (profile->type) {
  870. case OCCURENCE: {
  871. unsigned int count =
  872. atomic_read(&profile->val.occurence.count);
  873. if (count) {
  874. for (int j = 0 ; j < level ; j++)
  875. __sys_fprintf(hdl, " ");
  876. __sys_fprintf(hdl, "- %s: %u times\n", profile->name, count);
  877. }
  878. break;
  879. }
  880. case INTERVAL: {
  881. unsigned int count =
  882. atomic_read(&profile->val.interval.count);
  883. if (count) {
  884. unsigned long time =
  885. atomic_read(&profile->val.interval.time);
  886. unsigned long ind_time = time / count;
  887. total_interval_time += time;
  888. total_interval_count += count;
  889. for (int j = 0 ; j < level ; j++)
  890. __sys_fprintf(hdl, " ");
  891. __sys_fprintf(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  892. time, profile->name, count, ind_time);
  893. }
  894. break;
  895. }
  896. case CATAGORY:
  897. for (int j = 0 ; j < level ; j++)
  898. __sys_fprintf(hdl, " ");
  899. __sys_fprintf(hdl, "- %s:\n", profile->name);
  900. print_profile_result(hdl, profile, level + 1);
  901. break;
  902. }
  903. }
  904. if (total_interval_count) {
  905. __sys_fprintf(hdl, " - (%11.11u) total: %u times, %lu msec\n",
  906. total_interval_time, total_interval_count,
  907. total_interval_time / total_interval_count);
  908. }
  909. }
  910. #endif /* PROFILE */
  911. static struct shim_atomic in_terminate = { .counter = 0, };
  912. int shim_terminate (void)
  913. {
  914. debug("teminating the whole process\n");
  915. /* do last clean-up of the process */
  916. shim_clean();
  917. DkProcessExit(0);
  918. return 0;
  919. }
  920. int shim_clean (void)
  921. {
  922. /* preventing multiple cleanup, this is mostly caused by
  923. assertion in shim_clean */
  924. atomic_inc(&in_terminate);
  925. if (atomic_read(&in_terminate) > 1)
  926. return 0;
  927. store_all_msg_persist();
  928. #ifdef PROFILE
  929. if (ENTER_TIME) {
  930. switch (SHIM_GET_TLS()->context.syscall_nr) {
  931. case __NR_exit_group:
  932. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  933. break;
  934. case __NR_exit:
  935. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  936. break;
  937. }
  938. }
  939. if (ipc_cld_profile_send()) {
  940. master_lock();
  941. PAL_HANDLE hdl = __open_shim_stdio();
  942. if (hdl) {
  943. __sys_fprintf(hdl, "******************************\n");
  944. __sys_fprintf(hdl, "profiling:\n");
  945. print_profile_result(hdl, &profile_root, 0);
  946. __sys_fprintf(hdl, "******************************\n");
  947. }
  948. master_unlock();
  949. }
  950. #endif
  951. del_all_ipc_ports(0);
  952. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  953. DkObjectClose(shim_stdio);
  954. shim_stdio = NULL;
  955. debug("process %u successfully terminated\n", cur_process.vmid);
  956. master_lock();
  957. DkProcessExit(cur_process.exit_code);
  958. return 0;
  959. }
  960. int message_confirm (const char * message, const char * options)
  961. {
  962. char answer;
  963. int noptions = strlen(options);
  964. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  965. int ret = 0;
  966. *(str++) = ' ';
  967. *(str++) = '[';
  968. for (int i = 0 ; i < noptions ; i++) {
  969. *(str++) = options[i];
  970. *(str++) = '/';
  971. }
  972. str--;
  973. *(str++) = ']';
  974. *(str++) = ' ';
  975. master_lock();
  976. PAL_HANDLE hdl = __open_shim_stdio();
  977. if (!hdl) {
  978. master_unlock();
  979. return -EACCES;
  980. }
  981. #define WRITE(buf, len) \
  982. ({ int _ret = DkStreamWrite(hdl, 0, len, buf, NULL); \
  983. _ret ? : -PAL_ERRNO; })
  984. #define READ(buf, len) \
  985. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  986. _ret ? : -PAL_ERRNO; })
  987. if ((ret = WRITE(message, strlen(message))) < 0)
  988. goto out;
  989. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  990. goto out;
  991. if ((ret = READ(&answer, 1)) < 0)
  992. goto out;
  993. out:
  994. master_unlock();
  995. return (ret < 0) ? ret : answer;
  996. }