shim_init.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*!
  14. * \file shim_init.c
  15. *
  16. * This file contains entry and exit functions of library OS.
  17. */
  18. #include <shim_internal.h>
  19. #include <shim_table.h>
  20. #include <shim_tls.h>
  21. #include <shim_thread.h>
  22. #include <shim_handle.h>
  23. #include <shim_vma.h>
  24. #include <shim_checkpoint.h>
  25. #include <shim_fs.h>
  26. #include <shim_ipc.h>
  27. #include <shim_profile.h>
  28. #include <shim_vdso.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_version = GLIBC_VERSION;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. __UNUSED(event);
  45. __UNUSED(context);
  46. shim_get_tls()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  47. }
  48. noreturn void __abort(void) {
  49. PAUSE();
  50. shim_terminate(-ENOTRECOVERABLE);
  51. }
  52. void warn (const char *format, ...)
  53. {
  54. va_list args;
  55. va_start (args, format);
  56. __SYS_VPRINTF(format, args);
  57. va_end (args);
  58. }
  59. void __stack_chk_fail (void)
  60. {
  61. }
  62. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  63. /* reserved */ 0,
  64. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  65. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  66. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  67. /* PAL_ERROR_INVAL */ EINVAL,
  68. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  69. /* PAL_ERROR_DENIED */ EACCES,
  70. /* PAL_ERROR_BADHANDLE */ EFAULT,
  71. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  72. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  73. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  74. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  75. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  76. /* PAL_ERROR_INTERRUPTED */ EINTR,
  77. /* PAL_ERROR_OVERFLOW */ EFAULT,
  78. /* PAL_ERROR_BADADDR */ EFAULT,
  79. /* PAL_ERROR_NOMEM */ ENOMEM,
  80. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  81. /* PAL_ERROR_INCONSIST */ EFAULT,
  82. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  83. /* PAL_ERROR_ENDOFSTREAM */ 0,
  84. /* PAL_ERROR_NOTSERVER */ EINVAL,
  85. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  86. /* PAL_ERROR_ZEROSIZE */ 0,
  87. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  88. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  89. };
  90. long convert_pal_errno (long err)
  91. {
  92. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  93. pal_errno_to_unix_errno[err] : 0;
  94. }
  95. /*!
  96. * \brief Parse a number into an unsigned long.
  97. *
  98. * \param str A string containing a non-negative number.
  99. *
  100. * By default the number should be decimal, but if it starts with 0x it is
  101. * parsed as hexadecimal and if it otherwise starts with 0, it is parsed as
  102. * octal.
  103. */
  104. unsigned long parse_int (const char * str)
  105. {
  106. unsigned long num = 0;
  107. int radix = 10;
  108. char c;
  109. if (str[0] == '0') {
  110. str++;
  111. radix = 8;
  112. if (str[0] == 'x') {
  113. str++;
  114. radix = 16;
  115. }
  116. }
  117. while ((c = *(str++))) {
  118. int val;
  119. if (c >= 'A' && c <= 'F')
  120. val = c - 'A' + 10;
  121. else if (c >= 'a' && c <= 'f')
  122. val = c - 'a' + 10;
  123. else if (c >= '0' && c <= '9')
  124. val = c - '0';
  125. else
  126. break;
  127. if (val >= radix)
  128. break;
  129. num = num * radix + val;
  130. }
  131. if (c == 'G' || c == 'g')
  132. num *= 1024 * 1024 * 1024;
  133. else if (c == 'M' || c == 'm')
  134. num *= 1024 * 1024;
  135. else if (c == 'K' || c == 'k')
  136. num *= 1024;
  137. return num;
  138. }
  139. long int glibc_option (const char * opt)
  140. {
  141. char cfg[CONFIG_MAX];
  142. if (strcmp_static(opt, "heap_size")) {
  143. ssize_t ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  144. if (ret <= 0) {
  145. debug("no glibc option: %s (err=%ld)\n", opt, ret);
  146. return -ENOENT;
  147. }
  148. long int heap_size = parse_int(cfg);
  149. debug("glibc option: heap_size = %ld\n", heap_size);
  150. return (long int) heap_size;
  151. }
  152. return -EINVAL;
  153. }
  154. void * migrated_memory_start;
  155. void * migrated_memory_end;
  156. const char ** initial_envp __attribute_migratable;
  157. /* library_paths is populated with LD_PRELOAD entries once during LibOS
  158. * initialization and is used in __load_interp_object() to search for ELF
  159. * program interpreter in specific paths. Once allocated, its memory is
  160. * never freed or updated. */
  161. char ** library_paths = NULL;
  162. struct shim_lock __master_lock;
  163. bool lock_enabled;
  164. void init_tcb (shim_tcb_t * tcb)
  165. {
  166. tcb->canary = SHIM_TLS_CANARY;
  167. tcb->self = tcb;
  168. }
  169. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  170. {
  171. memset(new_tcb, 0, sizeof(shim_tcb_t));
  172. new_tcb->canary = SHIM_TLS_CANARY;
  173. new_tcb->self = new_tcb;
  174. new_tcb->tp = old_tcb->tp;
  175. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  176. new_tcb->tid = old_tcb->tid;
  177. new_tcb->debug_buf = old_tcb->debug_buf;
  178. }
  179. /* This function is used to allocate tls before interpreter start running */
  180. void allocate_tls (__libc_tcb_t * tcb, bool user, struct shim_thread * thread)
  181. {
  182. assert(tcb);
  183. tcb->tcb = tcb;
  184. init_tcb(&tcb->shim_tcb);
  185. if (thread) {
  186. thread->tcb = tcb;
  187. thread->user_tcb = user;
  188. tcb->shim_tcb.tp = thread;
  189. tcb->shim_tcb.tid = thread->tid;
  190. } else {
  191. tcb->shim_tcb.tp = NULL;
  192. tcb->shim_tcb.tid = 0;
  193. }
  194. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  195. assert(shim_tls_check_canary());
  196. }
  197. void populate_tls (__libc_tcb_t * tcb, bool user)
  198. {
  199. assert(tcb);
  200. tcb->tcb = tcb;
  201. copy_tcb(&tcb->shim_tcb, shim_get_tls());
  202. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  203. if (thread) {
  204. thread->tcb = tcb;
  205. thread->user_tcb = user;
  206. }
  207. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  208. assert(shim_tls_check_canary());
  209. }
  210. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  211. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  212. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  213. void * allocate_stack (size_t size, size_t protect_size, bool user)
  214. {
  215. size = ALIGN_UP(size);
  216. protect_size = ALIGN_UP(protect_size);
  217. /* preserve a non-readable, non-writable page below the user
  218. stack to stop user program to clobber other vmas */
  219. void * stack = NULL;
  220. int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
  221. if (user) {
  222. stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
  223. flags, NULL, 0, "stack");
  224. if (!stack)
  225. return NULL;
  226. stack = (void *)
  227. DkVirtualMemoryAlloc(stack, size + protect_size,
  228. 0, PAL_PROT_NONE);
  229. } else {
  230. stack = system_malloc(size + protect_size);
  231. }
  232. if (!stack)
  233. return NULL;
  234. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  235. INC_PROFILE_OCCURENCE(alloc_stack_count);
  236. stack += protect_size;
  237. // Ensure proper alignment for process' initial stack pointer value.
  238. stack += (16 - (uintptr_t)stack % 16) % 16;
  239. DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
  240. if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
  241. return NULL;
  242. debug("allocated stack at %p (size = %ld)\n", stack, size);
  243. return stack;
  244. }
  245. static int populate_user_stack (void * stack, size_t stack_size,
  246. elf_auxv_t ** auxpp, int ** argcpp,
  247. const char *** argvp, const char *** envpp)
  248. {
  249. const int argc = **argcpp;
  250. const char ** argv = *argvp, ** envp = *envpp;
  251. const char ** new_argv = NULL, ** new_envp = NULL;
  252. elf_auxv_t *new_auxp = NULL;
  253. void * stack_bottom = stack;
  254. void * stack_top = stack + stack_size;
  255. #define ALLOCATE_TOP(size) \
  256. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  257. stack_top; })
  258. #define ALLOCATE_BOTTOM(size) \
  259. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  260. stack_bottom - (size); })
  261. /* ld.so expects argc as long on stack, not int. */
  262. long * argcp = ALLOCATE_BOTTOM(sizeof(long));
  263. *argcp = **argcpp;
  264. if (!argv) {
  265. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  266. goto copy_envp;
  267. }
  268. new_argv = stack_bottom;
  269. while (argv) {
  270. for (const char ** a = argv ; *a ; a++) {
  271. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  272. int len = strlen(*a) + 1;
  273. char * abuf = ALLOCATE_TOP(len);
  274. memcpy(abuf, *a, len);
  275. *t = abuf;
  276. }
  277. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  278. copy_envp:
  279. if (!envp)
  280. break;
  281. new_envp = stack_bottom;
  282. argv = envp;
  283. envp = NULL;
  284. }
  285. if (!new_envp)
  286. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  287. /* reserve space for ELF aux vectors, populated later by LibOS */
  288. new_auxp = ALLOCATE_BOTTOM(REQUIRED_ELF_AUXV * sizeof(elf_auxv_t) +
  289. REQUIRED_ELF_AUXV_SPACE);
  290. /* x86_64 ABI requires 16 bytes alignment on stack on every function
  291. call. */
  292. size_t move_size = stack_bottom - stack;
  293. *argcpp = stack_top - move_size;
  294. *argcpp = ALIGN_DOWN_PTR(*argcpp, 16UL);
  295. **argcpp = argc;
  296. size_t shift = (void*)(*argcpp) - stack;
  297. memmove(*argcpp, stack, move_size);
  298. *argvp = new_argv ? (void *) new_argv + shift : NULL;
  299. *envpp = new_envp ? (void *) new_envp + shift : NULL;
  300. *auxpp = new_auxp ? (void *) new_auxp + shift : NULL;
  301. /* clear working area at the bottom */
  302. memset(stack, 0, shift);
  303. return 0;
  304. }
  305. int init_stack (const char ** argv, const char ** envp,
  306. int ** argcpp, const char *** argpp,
  307. elf_auxv_t ** auxpp, size_t reserve)
  308. {
  309. uint64_t stack_size = get_rlimit_cur(RLIMIT_STACK);
  310. if (root_config) {
  311. char stack_cfg[CONFIG_MAX];
  312. if (get_config(root_config, "sys.stack.size", stack_cfg, CONFIG_MAX) > 0) {
  313. stack_size = ALIGN_UP(parse_int(stack_cfg));
  314. set_rlimit_cur(RLIMIT_STACK, stack_size);
  315. }
  316. }
  317. struct shim_thread * cur_thread = get_cur_thread();
  318. if (!cur_thread || cur_thread->stack)
  319. return 0;
  320. void * stack = allocate_stack(stack_size, allocsize, true);
  321. if (!stack)
  322. return -ENOMEM;
  323. if (initial_envp)
  324. envp = initial_envp;
  325. int ret = populate_user_stack(stack, stack_size - reserve,
  326. auxpp, argcpp, &argv, &envp);
  327. if (ret < 0)
  328. return ret;
  329. *argpp = argv;
  330. initial_envp = envp;
  331. cur_thread->stack_top = stack + stack_size;
  332. cur_thread->stack = stack;
  333. cur_thread->stack_red = stack - allocsize;
  334. return 0;
  335. }
  336. int read_environs (const char ** envp)
  337. {
  338. for (const char ** e = envp ; *e ; e++) {
  339. if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
  340. /* populate library_paths with entries from LD_LIBRARY_PATH envvar */
  341. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  342. size_t npaths = 2; // One for the first entry, one for the last
  343. // NULL.
  344. for (const char * tmp = s ; *tmp ; tmp++)
  345. if (*tmp == ':')
  346. npaths++;
  347. char** paths = malloc(sizeof(const char *) *
  348. npaths);
  349. if (!paths)
  350. return -ENOMEM;
  351. size_t cnt = 0;
  352. while (*s) {
  353. const char * next;
  354. for (next = s ; *next && *next != ':' ; next++);
  355. size_t len = next - s;
  356. char * str = malloc(len + 1);
  357. if (!str) {
  358. for (size_t i = 0; i < cnt; i++)
  359. free(paths[i]);
  360. free(paths);
  361. return -ENOMEM;
  362. }
  363. memcpy(str, s, len);
  364. str[len] = 0;
  365. paths[cnt++] = str;
  366. s = *next ? next + 1 : next;
  367. }
  368. paths[cnt] = NULL;
  369. assert(!library_paths);
  370. library_paths = paths;
  371. return 0;
  372. }
  373. }
  374. return 0;
  375. }
  376. struct config_store * root_config = NULL;
  377. static void * __malloc (size_t size)
  378. {
  379. return malloc(size);
  380. }
  381. static void __free (void * mem)
  382. {
  383. free(mem);
  384. }
  385. int init_manifest (PAL_HANDLE manifest_handle)
  386. {
  387. int ret = 0;
  388. void * addr = NULL;
  389. size_t size = 0, map_size = 0;
  390. #define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  391. if (PAL_CB(manifest_preload.start)) {
  392. addr = PAL_CB(manifest_preload.start);
  393. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  394. } else {
  395. PAL_STREAM_ATTR attr;
  396. if (!DkStreamAttributesQueryByHandle(manifest_handle, &attr))
  397. return -PAL_ERRNO;
  398. size = attr.pending_size;
  399. map_size = ALIGN_UP(size);
  400. addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
  401. NULL, 0, "manifest");
  402. if (!addr)
  403. return -ENOMEM;
  404. void * ret_addr = DkStreamMap(manifest_handle, addr,
  405. PAL_PROT_READ, 0,
  406. ALIGN_UP(size));
  407. if (!ret_addr) {
  408. bkeep_munmap(addr, map_size, MAP_FLAGS);
  409. return -ENOMEM;
  410. } else {
  411. assert(addr == ret_addr);
  412. }
  413. }
  414. struct config_store * new_root_config = malloc(sizeof(struct config_store));
  415. if (!new_root_config) {
  416. ret = -ENOMEM;
  417. goto fail;
  418. }
  419. new_root_config->raw_data = addr;
  420. new_root_config->raw_size = size;
  421. new_root_config->malloc = __malloc;
  422. new_root_config->free = __free;
  423. const char * errstring = "Unexpected error";
  424. if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
  425. SYS_PRINTF("Unable to read manifest file: %s\n", errstring);
  426. goto fail;
  427. }
  428. root_config = new_root_config;
  429. return 0;
  430. fail:
  431. if (map_size) {
  432. DkStreamUnmap(addr, map_size);
  433. if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
  434. BUG();
  435. }
  436. free(new_root_config);
  437. return ret;
  438. }
  439. #ifdef PROFILE
  440. struct shim_profile profile_root;
  441. #endif
  442. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  443. do { \
  444. void *_tmp = (cookie); \
  445. (argv) = _tmp; \
  446. _tmp += sizeof(char *) * ((argc) + 1); \
  447. (envp) = _tmp; \
  448. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  449. (auxp) = _tmp + sizeof(char *); \
  450. } while (0)
  451. #ifdef PROFILE
  452. static void set_profile_enabled (const char ** envp)
  453. {
  454. const char ** p;
  455. for (p = envp ; (*p) ; p++)
  456. if (strpartcmp_static(*p, "PROFILE_ENABLED="))
  457. break;
  458. if (!(*p))
  459. return;
  460. for (size_t i = 0 ; i < N_PROFILE ; i++)
  461. PROFILES[i].disabled = true;
  462. const char * str = (*p) + 16;
  463. bool enabled = false;
  464. while (*str) {
  465. const char * next = str;
  466. for ( ; (*next) && (*next) != ',' ; next++);
  467. if (next > str) {
  468. size_t len = next - str;
  469. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  470. struct shim_profile * profile = &PROFILES[i];
  471. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  472. profile->disabled = false;
  473. if (profile->type == CATEGORY)
  474. enabled = true;
  475. }
  476. }
  477. }
  478. str = (*next) ? next + 1 : next;
  479. }
  480. while (enabled) {
  481. enabled = false;
  482. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  483. struct shim_profile * profile = &PROFILES[i];
  484. if (!profile->disabled || profile->root == &profile_)
  485. continue;
  486. if (!profile->root->disabled) {
  487. profile->disabled = false;
  488. if (profile->type == CATEGORY)
  489. enabled = true;
  490. }
  491. }
  492. }
  493. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  494. struct shim_profile * profile = &PROFILES[i];
  495. if (profile->type == CATEGORY || profile->disabled)
  496. continue;
  497. for (profile = profile->root ;
  498. profile != &profile_ && profile->disabled ;
  499. profile = profile->root)
  500. profile->disabled = false;
  501. }
  502. }
  503. #endif
  504. static int init_newproc (struct newproc_header * hdr)
  505. {
  506. BEGIN_PROFILE_INTERVAL();
  507. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  508. sizeof(struct newproc_header), hdr,
  509. NULL, 0);
  510. if (!bytes)
  511. return -PAL_ERRNO;
  512. SAVE_PROFILE_INTERVAL(child_wait_header);
  513. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  514. return hdr->failure;
  515. }
  516. DEFINE_PROFILE_CATEGORY(pal, );
  517. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  518. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  519. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  520. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  521. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  522. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  523. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  524. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  525. DEFINE_PROFILE_CATEGORY(init, );
  526. DEFINE_PROFILE_INTERVAL(init_vma, init);
  527. DEFINE_PROFILE_INTERVAL(init_slab, init);
  528. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  529. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  530. DEFINE_PROFILE_INTERVAL(init_rlimit, init);
  531. DEFINE_PROFILE_INTERVAL(init_fs, init);
  532. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  533. DEFINE_PROFILE_INTERVAL(init_handle, init);
  534. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  535. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  536. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  537. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  538. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  539. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  540. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  541. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  542. DEFINE_PROFILE_INTERVAL(init_thread, init);
  543. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  544. DEFINE_PROFILE_INTERVAL(init_mount, init);
  545. DEFINE_PROFILE_INTERVAL(init_async, init);
  546. DEFINE_PROFILE_INTERVAL(init_stack, init);
  547. DEFINE_PROFILE_INTERVAL(read_environs, init);
  548. DEFINE_PROFILE_INTERVAL(init_loader, init);
  549. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  550. DEFINE_PROFILE_INTERVAL(init_signal, init);
  551. #define CALL_INIT(func, args ...) func(args)
  552. #define RUN_INIT(func, ...) \
  553. do { \
  554. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  555. if (_err < 0) { \
  556. SYS_PRINTF("shim_init() in " #func " (%d)\n", _err); \
  557. shim_terminate(_err); \
  558. } \
  559. SAVE_PROFILE_INTERVAL(func); \
  560. } while (0)
  561. extern PAL_HANDLE thread_start_event;
  562. noreturn void* shim_init (int argc, void * args)
  563. {
  564. debug_handle = PAL_CB(debug_stream);
  565. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  566. /* create the initial TCB, shim can not be run without a tcb */
  567. __libc_tcb_t tcb;
  568. memset(&tcb, 0, sizeof(__libc_tcb_t));
  569. allocate_tls(&tcb, false, NULL);
  570. __disable_preempt(&tcb.shim_tcb); // Temporarily disable preemption for delaying any signal
  571. // that arrives during initialization
  572. debug_setbuf(&tcb.shim_tcb, true);
  573. debug("set tcb to %p\n", &tcb);
  574. #ifdef PROFILE
  575. unsigned long begin_time = GET_PROFILE_INTERVAL();
  576. #endif
  577. debug("host: %s\n", PAL_CB(host_type));
  578. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE);
  579. allocsize = PAL_CB(alloc_align);
  580. allocshift = allocsize - 1;
  581. allocmask = ~allocshift;
  582. create_lock(&__master_lock);
  583. int * argcp = &argc;
  584. const char ** argv, ** envp, ** argp = NULL;
  585. elf_auxv_t * auxp;
  586. /* call to figure out where the arguments are */
  587. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  588. #ifdef PROFILE
  589. set_profile_enabled(envp);
  590. #endif
  591. struct newproc_header hdr;
  592. void * cpaddr = NULL;
  593. #ifdef PROFILE
  594. unsigned long begin_create_time = 0;
  595. #endif
  596. BEGIN_PROFILE_INTERVAL();
  597. RUN_INIT(init_vma);
  598. RUN_INIT(init_slab);
  599. RUN_INIT(read_environs, envp);
  600. RUN_INIT(init_str_mgr);
  601. RUN_INIT(init_internal_map);
  602. RUN_INIT(init_rlimit);
  603. RUN_INIT(init_fs);
  604. RUN_INIT(init_dcache);
  605. RUN_INIT(init_handle);
  606. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  607. if (argc && argv[0][0] == '-') {
  608. if (strcmp_static(argv[0], "-resume") && argc >= 2) {
  609. const char * filename = *(argv + 1);
  610. argc -= 2;
  611. argv += 2;
  612. RUN_INIT(init_mount_root);
  613. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  614. &cpaddr);
  615. }
  616. }
  617. if (!cpaddr && PAL_CB(parent_process)) {
  618. RUN_INIT(init_newproc, &hdr);
  619. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  620. hdr.create_time, begin_time);
  621. #ifdef PROFILE
  622. begin_create_time = hdr.begin_create_time;
  623. #endif
  624. if (hdr.checkpoint.hdr.size)
  625. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  626. }
  627. if (cpaddr) {
  628. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  629. RUN_INIT(restore_checkpoint,
  630. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  631. (ptr_t) cpaddr, 0);
  632. }
  633. if (PAL_CB(manifest_handle))
  634. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  635. RUN_INIT(init_mount_root);
  636. RUN_INIT(init_ipc);
  637. RUN_INIT(init_thread);
  638. RUN_INIT(init_mount);
  639. RUN_INIT(init_important_handles);
  640. RUN_INIT(init_async);
  641. RUN_INIT(init_stack, argv, envp, &argcp, &argp, &auxp, 0);
  642. RUN_INIT(init_loader);
  643. RUN_INIT(init_ipc_helper);
  644. RUN_INIT(init_signal);
  645. if (PAL_CB(parent_process)) {
  646. /* Notify the parent process */
  647. struct newproc_response res;
  648. res.child_vmid = cur_process.vmid;
  649. res.failure = 0;
  650. if (!DkStreamWrite(PAL_CB(parent_process), 0,
  651. sizeof(struct newproc_response),
  652. &res, NULL))
  653. shim_do_exit(-PAL_ERRNO);
  654. }
  655. debug("shim process initialized\n");
  656. #ifdef PROFILE
  657. if (begin_create_time)
  658. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  659. begin_create_time);
  660. #endif
  661. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  662. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  663. pal_control.host_specific_startup_time);
  664. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  665. pal_control.relocation_time);
  666. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  667. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  668. pal_control.manifest_loading_time);
  669. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  670. pal_control.allocation_time);
  671. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  672. pal_control.tail_startup_time);
  673. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  674. pal_control.child_creation_time);
  675. if (thread_start_event)
  676. DkEventSet(thread_start_event);
  677. shim_tcb_t * cur_tcb = shim_get_tls();
  678. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  679. if (cur_tcb->context.regs && cur_tcb->context.regs->rsp) {
  680. vdso_map_migrate();
  681. restore_context(&cur_tcb->context);
  682. }
  683. if (cur_thread->exec)
  684. execute_elf_object(cur_thread->exec, argcp, argp, auxp);
  685. shim_do_exit(0);
  686. }
  687. static int create_unique (int (*mkname) (char *, size_t, void *),
  688. int (*create) (const char *, void *),
  689. int (*output) (char *, size_t, const void *,
  690. struct shim_qstr *),
  691. char * name, size_t size, void * id, void * obj,
  692. struct shim_qstr * qstr)
  693. {
  694. int ret, len;
  695. while (1) {
  696. len = mkname(name, size, id);
  697. if (len < 0)
  698. return len;
  699. if ((ret = create(name, obj)) < 0)
  700. return ret;
  701. if (ret)
  702. continue;
  703. if (output)
  704. return output(name, size, id, qstr);
  705. if (qstr)
  706. qstrsetstr(qstr, name, len);
  707. return len;
  708. }
  709. }
  710. static int name_pipe_rand (char * uri, size_t size, void * id)
  711. {
  712. IDTYPE pipeid;
  713. size_t len;
  714. int ret = DkRandomBitsRead(&pipeid, sizeof(pipeid));
  715. if (ret < 0)
  716. return -convert_pal_errno(-ret);
  717. debug("creating pipe: pipe.srv:%u\n", pipeid);
  718. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) >= size)
  719. return -ERANGE;
  720. *((IDTYPE *)id) = pipeid;
  721. return len;
  722. }
  723. static int name_pipe_vmid (char * uri, size_t size, void * id)
  724. {
  725. IDTYPE pipeid = cur_process.vmid;
  726. size_t len;
  727. debug("creating pipe: pipe.srv:%u\n", pipeid);
  728. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) >= size)
  729. return -ERANGE;
  730. *((IDTYPE *)id) = pipeid;
  731. return len;
  732. }
  733. static int open_pipe (const char * uri, void * obj)
  734. {
  735. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  736. if (!pipe)
  737. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  738. -PAL_ERRNO;
  739. if (obj)
  740. *((PAL_HANDLE *) obj) = pipe;
  741. else
  742. DkObjectClose(pipe);
  743. return 0;
  744. }
  745. static int pipe_addr (char * uri, size_t size, const void * id,
  746. struct shim_qstr * qstr)
  747. {
  748. IDTYPE pipeid = *((IDTYPE *) id);
  749. size_t len;
  750. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  751. return -ERANGE;
  752. if (qstr)
  753. qstrsetstr(qstr, uri, len);
  754. return len;
  755. }
  756. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  757. struct shim_qstr * qstr, bool use_vmid_for_name)
  758. {
  759. IDTYPE pipeid;
  760. int ret;
  761. if (use_vmid_for_name)
  762. ret = create_unique(&name_pipe_vmid, &open_pipe, &pipe_addr,
  763. uri, size, &pipeid, hdl, qstr);
  764. else
  765. ret = create_unique(&name_pipe_rand, &open_pipe, &pipe_addr,
  766. uri, size, &pipeid, hdl, qstr);
  767. if (ret > 0 && id)
  768. *id = pipeid;
  769. return ret;
  770. }
  771. static int name_path (char * path, size_t size, void * id)
  772. {
  773. unsigned int suffix;
  774. int prefix_len = strlen(path);
  775. size_t len;
  776. int ret = DkRandomBitsRead(&suffix, sizeof(suffix));
  777. if (ret < 0)
  778. return -convert_pal_errno(-ret);
  779. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  780. if (len == size)
  781. return -ERANGE;
  782. *((unsigned int *) id) = suffix;
  783. return prefix_len + len;
  784. }
  785. static int open_dir (const char * path, void * obj)
  786. {
  787. struct shim_handle * dir = NULL;
  788. if (obj) {
  789. dir = get_new_handle();
  790. if (!dir)
  791. return -ENOMEM;
  792. }
  793. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  794. NULL);
  795. if (ret < 0)
  796. return ret = -EEXIST ? 1 : ret;
  797. if (obj)
  798. *((struct shim_handle **) obj) = dir;
  799. return 0;
  800. }
  801. static int open_file (const char * path, void * obj)
  802. {
  803. struct shim_handle * file = NULL;
  804. if (obj) {
  805. file = get_new_handle();
  806. if (!file)
  807. return -ENOMEM;
  808. }
  809. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  810. NULL);
  811. if (ret < 0)
  812. return ret = -EEXIST ? 1 : ret;
  813. if (obj)
  814. *((struct shim_handle **) obj) = file;
  815. return 0;
  816. }
  817. static int open_pal_handle (const char * uri, void * obj)
  818. {
  819. PAL_HANDLE hdl;
  820. if (strpartcmp_static(uri, "dev:"))
  821. hdl = DkStreamOpen(uri, 0,
  822. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  823. PAL_SHARE_OWNER_R,
  824. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  825. 0);
  826. else
  827. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  828. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  829. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  830. 0);
  831. if (!hdl) {
  832. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  833. return 0;
  834. else
  835. return -PAL_ERRNO;
  836. }
  837. if (obj) {
  838. *((PAL_HANDLE *) obj) = hdl;
  839. } else {
  840. DkObjectClose(hdl);
  841. }
  842. return 0;
  843. }
  844. static int output_path (char * path, size_t size, const void * id,
  845. struct shim_qstr * qstr)
  846. {
  847. size_t len = strlen(path);
  848. // API compatibility
  849. __UNUSED(size);
  850. __UNUSED(id);
  851. if (qstr)
  852. qstrsetstr(qstr, path, len);
  853. return len;
  854. }
  855. int create_dir (const char * prefix, char * path, size_t size,
  856. struct shim_handle ** hdl)
  857. {
  858. unsigned int suffix;
  859. if (prefix) {
  860. size_t len = strlen(prefix);
  861. if (len >= size)
  862. return -ERANGE;
  863. memcpy(path, prefix, len + 1);
  864. }
  865. return create_unique(&name_path, &open_dir, &output_path, path, size,
  866. &suffix, hdl, NULL);
  867. }
  868. int create_file (const char * prefix, char * path, size_t size,
  869. struct shim_handle ** hdl)
  870. {
  871. unsigned int suffix;
  872. if (prefix) {
  873. size_t len = strlen(prefix);
  874. if (len >= size)
  875. return -ERANGE;
  876. memcpy(path, prefix, len + 1);
  877. }
  878. return create_unique(&name_path, &open_file, &output_path, path, size,
  879. &suffix, hdl, NULL);
  880. }
  881. int create_handle (const char * prefix, char * uri, size_t size,
  882. PAL_HANDLE * hdl, unsigned int * id)
  883. {
  884. unsigned int suffix;
  885. if (prefix) {
  886. size_t len = strlen(prefix);
  887. if (len >= size)
  888. return -ERANGE;
  889. memcpy(uri, prefix, len + 1);
  890. }
  891. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  892. id ? : &suffix, hdl, NULL);
  893. }
  894. void check_stack_hook (void)
  895. {
  896. struct shim_thread * cur_thread = get_cur_thread();
  897. void * rsp;
  898. __asm__ volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  899. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  900. if ((uintptr_t) rsp - (uintptr_t) cur_thread->stack < PAL_CB(pagesize))
  901. SYS_PRINTF("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  902. rsp, cur_thread->stack, cur_thread->stack_top);
  903. } else {
  904. SYS_PRINTF("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  905. rsp, cur_thread->stack, cur_thread->stack_top);
  906. }
  907. }
  908. #ifdef PROFILE
  909. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  910. int level)
  911. {
  912. unsigned long total_interval_time = 0;
  913. unsigned long total_interval_count = 0;
  914. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  915. struct shim_profile * profile = &PROFILES[i];
  916. if (profile->root != root || profile->disabled)
  917. continue;
  918. switch (profile->type) {
  919. case OCCURENCE: {
  920. unsigned int count =
  921. atomic_read(&profile->val.occurence.count);
  922. if (count) {
  923. for (int j = 0 ; j < level ; j++)
  924. __SYS_FPRINTF(hdl, " ");
  925. __SYS_FPRINTF(hdl, "- %s: %u times\n", profile->name, count);
  926. }
  927. break;
  928. }
  929. case INTERVAL: {
  930. unsigned int count =
  931. atomic_read(&profile->val.interval.count);
  932. if (count) {
  933. unsigned long time =
  934. atomic_read(&profile->val.interval.time);
  935. unsigned long ind_time = time / count;
  936. total_interval_time += time;
  937. total_interval_count += count;
  938. for (int j = 0 ; j < level ; j++)
  939. __SYS_FPRINTF(hdl, " ");
  940. __SYS_FPRINTF(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  941. time, profile->name, count, ind_time);
  942. }
  943. break;
  944. }
  945. case CATEGORY:
  946. for (int j = 0 ; j < level ; j++)
  947. __SYS_FPRINTF(hdl, " ");
  948. __SYS_FPRINTF(hdl, "- %s:\n", profile->name);
  949. print_profile_result(hdl, profile, level + 1);
  950. break;
  951. }
  952. }
  953. if (total_interval_count) {
  954. __SYS_FPRINTF(hdl, " - (%11.11lu) total: %lu times, %lu msec\n",
  955. total_interval_time, total_interval_count,
  956. total_interval_time / total_interval_count);
  957. }
  958. }
  959. #endif /* PROFILE */
  960. static struct atomic_int in_terminate = { .counter = 0, };
  961. noreturn void shim_terminate (int err)
  962. {
  963. debug("teminating the whole process (%d)\n", err);
  964. /* do last clean-up of the process */
  965. shim_clean(err);
  966. DkProcessExit(err);
  967. }
  968. /* cleanup and terminate process, preserve exit code if err == 0 */
  969. int shim_clean (int err)
  970. {
  971. /* preventing multiple cleanup, this is mostly caused by
  972. assertion in shim_clean */
  973. if (atomic_inc_return(&in_terminate) > 1)
  974. return 0;
  975. if (err != 0)
  976. cur_process.exit_code = err;
  977. store_all_msg_persist();
  978. #ifdef PROFILE
  979. if (ENTER_TIME) {
  980. switch (shim_get_tls()->context.orig_rax) {
  981. case __NR_exit_group:
  982. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  983. break;
  984. case __NR_exit:
  985. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  986. break;
  987. }
  988. }
  989. if (ipc_cld_profile_send()) {
  990. MASTER_LOCK();
  991. PAL_HANDLE hdl = __open_shim_stdio();
  992. if (hdl) {
  993. __SYS_FPRINTF(hdl, "******************************\n");
  994. __SYS_FPRINTF(hdl, "profiling:\n");
  995. print_profile_result(hdl, &profile_root, 0);
  996. __SYS_FPRINTF(hdl, "******************************\n");
  997. }
  998. MASTER_UNLOCK();
  999. DkObjectClose(hdl);
  1000. }
  1001. #endif
  1002. del_all_ipc_ports();
  1003. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  1004. DkObjectClose(shim_stdio);
  1005. shim_stdio = NULL;
  1006. debug("process %u exited with status %d\n", cur_process.vmid & 0xFFFF, cur_process.exit_code);
  1007. MASTER_LOCK();
  1008. DkProcessExit(cur_process.exit_code);
  1009. return 0;
  1010. }
  1011. int message_confirm (const char * message, const char * options)
  1012. {
  1013. char answer;
  1014. int noptions = strlen(options);
  1015. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  1016. int ret = 0;
  1017. *(str++) = ' ';
  1018. *(str++) = '[';
  1019. for (int i = 0 ; i < noptions ; i++) {
  1020. *(str++) = options[i];
  1021. *(str++) = '/';
  1022. }
  1023. str--;
  1024. *(str++) = ']';
  1025. *(str++) = ' ';
  1026. MASTER_LOCK();
  1027. PAL_HANDLE hdl = __open_shim_stdio();
  1028. if (!hdl) {
  1029. MASTER_UNLOCK();
  1030. return -EACCES;
  1031. }
  1032. #define WRITE(buf, len) \
  1033. ({ int _ret = DkStreamWrite(hdl, 0, len, (void*)(buf), NULL); \
  1034. _ret ? : -PAL_ERRNO; })
  1035. #define READ(buf, len) \
  1036. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  1037. _ret ? : -PAL_ERRNO; })
  1038. if ((ret = WRITE(message, strlen(message))) < 0)
  1039. goto out;
  1040. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  1041. goto out;
  1042. if ((ret = READ(&answer, 1)) < 0)
  1043. goto out;
  1044. out:
  1045. DkObjectClose(hdl);
  1046. MASTER_UNLOCK();
  1047. return (ret < 0) ? ret : answer;
  1048. }