shim_init.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_tls.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_vers_2_17 = GLIBC_VERSION_2_17;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. SHIM_GET_TLS()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  45. }
  46. void __assert_fail (const char * assertion, const char * file,
  47. unsigned int line, const char * function)
  48. {
  49. __sys_printf("assert failed %s:%d %s\n", file, line, assertion);
  50. pause();
  51. shim_terminate();
  52. }
  53. void __stack_chk_fail (void)
  54. {
  55. }
  56. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  57. /* reserved */ 0,
  58. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  59. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  60. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  61. /* PAL_ERROR_INVAL */ EINVAL,
  62. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  63. /* PAL_ERROR_DENIED */ EACCES,
  64. /* PAL_ERROR_BADHANDLE */ EFAULT,
  65. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  66. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  67. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  68. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  69. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  70. /* PAL_ERROR_INTERRUPTED */ EINTR,
  71. /* PAL_ERROR_OVERFLOW */ EFAULT,
  72. /* PAL_ERROR_BADADDR */ EFAULT,
  73. /* PAL_ERROR_NOMEM */ ENOMEM,
  74. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  75. /* PAL_ERROR_INCONSIST */ EFAULT,
  76. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  77. /* PAL_ERROR_ENDOFSTREAM */ 0,
  78. /* PAL_ERROR_NOTSERVER */ EINVAL,
  79. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  80. /* PAL_ERROR_ZEROSIZE */ 0,
  81. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  82. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  83. };
  84. long convert_pal_errno (long err)
  85. {
  86. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  87. pal_errno_to_unix_errno[err] : 0;
  88. }
  89. void * initial_stack = NULL;
  90. const char ** initial_envp __attribute_migratable = NULL;
  91. void * migrated_memory_start = 0;
  92. void * migrated_memory_end = 0;
  93. extern void * migrated_shim_addr;
  94. const char ** library_paths = NULL;
  95. bool in_gdb = false;
  96. LOCKTYPE __master_lock;
  97. bool lock_enabled = false;
  98. void init_tcb (shim_tcb_t * tcb)
  99. {
  100. tcb->canary = SHIM_TLS_CANARY;
  101. tcb->self = tcb;
  102. }
  103. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  104. {
  105. memset(new_tcb, 0, sizeof(shim_tcb_t));
  106. new_tcb->canary = SHIM_TLS_CANARY;
  107. new_tcb->self = new_tcb;
  108. new_tcb->tp = old_tcb->tp;
  109. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  110. new_tcb->tid = old_tcb->tid;
  111. new_tcb->debug_buf = old_tcb->debug_buf;
  112. }
  113. /* This function is used to allocate tls before interpreter start running */
  114. void allocate_tls (void * tcb_location, struct shim_thread * thread)
  115. {
  116. __libc_tcb_t * tcb = tcb_location;
  117. assert(tcb);
  118. tcb->tcb = tcb;
  119. init_tcb(&tcb->shim_tcb);
  120. if (thread) {
  121. thread->tcb = tcb;
  122. tcb->shim_tcb.tp = thread;
  123. tcb->shim_tcb.tid = thread->tid;
  124. }
  125. DkThreadPrivate(tcb);
  126. assert(SHIM_TLS_CHECK_CANARY());
  127. }
  128. void populate_tls (void * tcb_location)
  129. {
  130. __libc_tcb_t * tcb = (__libc_tcb_t *) tcb_location;
  131. assert(tcb);
  132. tcb->tcb = tcb;
  133. copy_tcb(&tcb->shim_tcb, SHIM_GET_TLS());
  134. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  135. if (thread)
  136. thread->tcb = tcb;
  137. DkThreadPrivate(tcb);
  138. assert(SHIM_TLS_CHECK_CANARY());
  139. }
  140. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  141. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  142. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  143. void * allocate_stack (size_t size, size_t protect_size, bool user)
  144. {
  145. size = ALIGN_UP(size);
  146. protect_size = ALIGN_UP(protect_size);
  147. /* preserve a non-readable, non-writeable page below the user
  148. stack to stop user program to clobber other vmas */
  149. void * stack = user ?
  150. get_unmapped_vma(size + protect_size, STACK_FLAGS) :
  151. NULL;
  152. if (user)
  153. stack = DkVirtualMemoryAlloc(stack, size + protect_size,
  154. 0, PAL_PROT_READ|PAL_PROT_WRITE);
  155. else
  156. stack = system_malloc(size + protect_size);
  157. if (!stack)
  158. return NULL;
  159. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  160. INC_PROFILE_OCCURENCE(alloc_stack_count);
  161. if (protect_size &&
  162. !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
  163. return NULL;
  164. stack += protect_size;
  165. if (user) {
  166. if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
  167. STACK_FLAGS, NULL, 0, "stack") < 0)
  168. return NULL;
  169. if (protect_size &&
  170. bkeep_mmap(stack - protect_size, protect_size, 0,
  171. STACK_FLAGS, NULL, 0, "stack-red") < 0)
  172. return NULL;
  173. }
  174. debug("allocated stack at %p (size = %d)\n", stack, size);
  175. return stack;
  176. }
  177. int populate_user_stack (void * stack, size_t stack_size,
  178. int nauxv, elf_auxv_t ** auxpp,
  179. const char *** argvp, const char *** envpp)
  180. {
  181. const char ** argv = *argvp, ** envp = *envpp;
  182. const char ** new_argv = NULL, ** new_envp = NULL;
  183. void * stack_bottom = stack;
  184. void * stack_top = stack + stack_size;
  185. #define ALLOCATE_TOP(size) \
  186. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  187. stack_top; })
  188. #define ALLOCATE_BOTTOM(size) \
  189. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  190. stack_bottom - (size); })
  191. if (!argv) {
  192. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  193. goto copy_envp;
  194. }
  195. new_argv = stack_bottom;
  196. while (argv) {
  197. for (const char ** a = argv ; *a ; a++) {
  198. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  199. int len = strlen(*a) + 1;
  200. char * abuf = ALLOCATE_TOP(len);
  201. memcpy(abuf, *a, len);
  202. *t = abuf;
  203. }
  204. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  205. copy_envp:
  206. if (!envp)
  207. break;
  208. new_envp = stack_bottom;
  209. argv = envp;
  210. envp = NULL;
  211. }
  212. if (!new_envp)
  213. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  214. stack_bottom = (void *) ((unsigned long) stack_bottom & ~7UL);
  215. *((unsigned long *) ALLOCATE_TOP(sizeof(unsigned long))) = 0;
  216. if (nauxv) {
  217. elf_auxv_t * old_auxp = *auxpp;
  218. *auxpp = ALLOCATE_TOP(sizeof(elf_auxv_t) * nauxv);
  219. if (old_auxp)
  220. memcpy(*auxpp, old_auxp, nauxv * sizeof(elf_auxv_t));
  221. }
  222. memmove(stack_top - (stack_bottom - stack), stack, stack_bottom - stack);
  223. if (new_argv)
  224. *argvp = (void *) new_argv + (stack_top - stack_bottom);
  225. if (new_envp)
  226. *envpp = (void *) new_envp + (stack_top - stack_bottom);
  227. return 0;
  228. }
  229. unsigned long sys_stack_size = 0;
  230. int init_stack (const char ** argv, const char ** envp, const char *** argpp,
  231. int nauxv, elf_auxv_t ** auxpp)
  232. {
  233. if (!sys_stack_size) {
  234. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  235. if (root_config) {
  236. char stack_cfg[CONFIG_MAX];
  237. if (get_config(root_config, "sys.stack.size", stack_cfg,
  238. CONFIG_MAX) > 0)
  239. sys_stack_size = ALIGN_UP(atoi(stack_cfg));
  240. }
  241. }
  242. struct shim_thread * cur_thread = get_cur_thread();
  243. if (!cur_thread || cur_thread->stack)
  244. return 0;
  245. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  246. if (!stack)
  247. return -ENOMEM;
  248. if (initial_envp)
  249. envp = initial_envp;
  250. int ret = populate_user_stack(stack, sys_stack_size,
  251. nauxv, auxpp, &argv, &envp);
  252. if (ret < 0)
  253. return ret;
  254. *argpp = argv;
  255. initial_envp = envp;
  256. cur_thread->stack_top = stack + sys_stack_size;
  257. cur_thread->stack = stack;
  258. cur_thread->stack_red = stack - allocsize;
  259. return 0;
  260. }
  261. int read_environs (const char ** envp)
  262. {
  263. for (const char ** e = envp ; *e ; e++) {
  264. switch ((*e)[0]) {
  265. case 'L': {
  266. if (!memcmp(*e, "LD_LIBRARY_PATH=", 16)) {
  267. int npaths = 0;
  268. for (const char * s = (*e) + 16 ; *s ; s++)
  269. if (*s == ':')
  270. npaths++;
  271. const char ** paths = malloc(sizeof(const char *) *
  272. (npaths + 1));
  273. if (!paths)
  274. return -ENOMEM;
  275. const char * s = (*e) + 16, * next;
  276. int cnt = 0;
  277. while (*s) {
  278. for (next = s ; *next && *next != ':' ; next++);
  279. int len = next - s;
  280. char * str = malloc(len + 1);
  281. if (!str)
  282. return -ENOMEM;
  283. memcpy(str, s, len);
  284. str[len] = 0;
  285. paths[cnt++] = str;
  286. s = *next ? next + 1 : next;
  287. }
  288. paths[cnt] = NULL;
  289. library_paths = paths;
  290. break;
  291. }
  292. break;
  293. }
  294. case 'I': {
  295. if (!memcmp(*e, "IN_GDB=1", 8)) {
  296. in_gdb = true;
  297. break;
  298. }
  299. break;
  300. }
  301. }
  302. }
  303. return 0;
  304. }
  305. struct config_store * root_config = NULL;
  306. static void * __malloc (int size)
  307. {
  308. return malloc(size);
  309. }
  310. static void __free (void * mem)
  311. {
  312. free(mem);
  313. }
  314. extern bool ask_for_checkpoint;
  315. int init_manifest (PAL_HANDLE manifest_handle)
  316. {
  317. PAL_STREAM_ATTR attr;
  318. if (!DkStreamAttributesQuerybyHandle(manifest_handle, &attr))
  319. return -PAL_ERRNO;
  320. size_t cfg_size = attr.size;
  321. void * cfg_addr = DkStreamMap(manifest_handle, NULL,
  322. PAL_PROT_READ, 0, ALIGN_UP(cfg_size));
  323. if (!cfg_addr)
  324. return -PAL_ERRNO;
  325. root_config = malloc(sizeof(struct config_store));
  326. root_config->raw_data = cfg_addr;
  327. root_config->raw_size = cfg_size;
  328. root_config->malloc = __malloc;
  329. root_config->free = __free;
  330. const char * errstring = "Unexpected error";
  331. int ret = 0;
  332. if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
  333. root_config = NULL;
  334. sys_printf("Unable to read manifest file: %s\n", errstring);
  335. return ret;
  336. }
  337. char cfgbuf[CONFIG_MAX];
  338. if (get_config(root_config, "sys.ask_for_checkpoint", cfgbuf,
  339. CONFIG_MAX) > 0 &&
  340. cfgbuf[0] == '1' && !cfgbuf[1])
  341. ask_for_checkpoint = true;
  342. return 0;
  343. }
  344. #ifdef PROFILE
  345. struct shim_profile profile_root;
  346. #endif
  347. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  348. do { \
  349. void *_tmp = (cookie); \
  350. (argv) = _tmp; \
  351. _tmp += sizeof(char *) * ((argc) + 1); \
  352. (envp) = _tmp; \
  353. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  354. (auxp) = _tmp + sizeof(char *); \
  355. } while (0)
  356. static void * __process_auxv (elf_auxv_t * auxp)
  357. {
  358. elf_auxv_t * av;
  359. for (av = auxp; av->a_type != AT_NULL; av++)
  360. switch (av->a_type) {
  361. default: break;
  362. }
  363. return av + 1;
  364. }
  365. #define FIND_LAST_STACK(stack) \
  366. do { \
  367. /* check if exist a NULL end */ \
  368. assert(*(uint64_t *) stack == 0); \
  369. stack += sizeof(uint64_t); \
  370. } while (0)
  371. #ifdef PROFILE
  372. static void set_profile_enabled (const char ** envp)
  373. {
  374. const char ** p;
  375. for (p = envp ; (*p) ; p++)
  376. if (!memcmp(*p, "PROFILE_ENABLED=", 16))
  377. break;
  378. if (!(*p))
  379. return;
  380. for (int i = 0 ; i < N_PROFILE ; i++)
  381. PROFILES[i].disabled = true;
  382. const char * str = (*p) + 16;
  383. bool enabled = false;
  384. while (*str) {
  385. const char * next = str;
  386. for ( ; (*next) && (*next) != ',' ; next++);
  387. if (next > str) {
  388. int len = next - str;
  389. for (int i = 0 ; i < N_PROFILE ; i++) {
  390. struct shim_profile * profile = &PROFILES[i];
  391. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  392. profile->disabled = false;
  393. if (profile->type == CATAGORY)
  394. enabled = true;
  395. }
  396. }
  397. }
  398. str = (*next) ? next + 1 : next;
  399. }
  400. while (enabled) {
  401. enabled = false;
  402. for (int i = 0 ; i < N_PROFILE ; i++) {
  403. struct shim_profile * profile = &PROFILES[i];
  404. if (!profile->disabled || profile->root == &profile_)
  405. continue;
  406. if (!profile->root->disabled) {
  407. profile->disabled = false;
  408. if (profile->type == CATAGORY)
  409. enabled = true;
  410. }
  411. }
  412. }
  413. for (int i = 0 ; i < N_PROFILE ; i++) {
  414. struct shim_profile * profile = &PROFILES[i];
  415. if (profile->type == CATAGORY || profile->disabled)
  416. continue;
  417. for (profile = profile->root ;
  418. profile != &profile_ && profile->disabled ;
  419. profile = profile->root)
  420. profile->disabled = false;
  421. }
  422. }
  423. #endif
  424. DEFINE_PROFILE_CATAGORY(resume, );
  425. DEFINE_PROFILE_INTERVAL(child_created_in_new_process, resume);
  426. DEFINE_PROFILE_INTERVAL(child_receive_header, resume);
  427. DEFINE_PROFILE_INTERVAL(child_total_migration_time, resume);
  428. static int init_newproc (struct newproc_header * hdr)
  429. {
  430. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  431. sizeof(struct newproc_header), hdr,
  432. NULL, 0);
  433. if (!bytes)
  434. return -PAL_ERRNO;
  435. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  436. return hdr->failure;
  437. }
  438. DEFINE_PROFILE_CATAGORY(init, );
  439. DEFINE_PROFILE_INTERVAL(init_signal, init);
  440. DEFINE_PROFILE_INTERVAL(init_heap, init);
  441. DEFINE_PROFILE_INTERVAL(init_slab, init);
  442. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  443. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  444. DEFINE_PROFILE_INTERVAL(init_vma, init);
  445. DEFINE_PROFILE_INTERVAL(init_fs, init);
  446. DEFINE_PROFILE_INTERVAL(init_handle, init);
  447. DEFINE_PROFILE_INTERVAL(init_randgen, init);
  448. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  449. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  450. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  451. DEFINE_PROFILE_INTERVAL(init_checkpoint, init);
  452. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  453. DEFINE_PROFILE_INTERVAL(restore_from_checkpoint, init);
  454. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  455. DEFINE_PROFILE_INTERVAL(restore_from_stack, init);
  456. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  457. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  458. DEFINE_PROFILE_INTERVAL(init_thread, init);
  459. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  460. DEFINE_PROFILE_INTERVAL(init_mount, init);
  461. DEFINE_PROFILE_INTERVAL(init_async, init);
  462. DEFINE_PROFILE_INTERVAL(init_stack, init);
  463. DEFINE_PROFILE_INTERVAL(read_environs, init);
  464. DEFINE_PROFILE_INTERVAL(init_loader, init);
  465. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  466. #define CALL_INIT(func, args ...) func(args)
  467. #define RUN_INIT(func, ...) \
  468. do { \
  469. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  470. if (_err < 0) { \
  471. sys_printf("shim initialization failed in " #func " (%e)", \
  472. -_err); \
  473. shim_terminate(); \
  474. } \
  475. SAVE_PROFILE_INTERVAL(func); \
  476. } while (0)
  477. extern PAL_HANDLE thread_start_event;
  478. int shim_init (int argc, void * args, void ** return_stack)
  479. {
  480. debug_handle = PAL_CB(debug_stream);
  481. /* create the initial TCB, shim can not be run without a tcb */
  482. __libc_tcb_t tcb;
  483. allocate_tls(&tcb, NULL);
  484. debug_setbuf(&tcb.shim_tcb, true);
  485. #ifdef PROFILE
  486. unsigned long begin_time = GET_PROFILE_INTERVAL();
  487. #endif
  488. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE, 0);
  489. allocsize = PAL_CB(alloc_align);
  490. allocshift = allocsize - 1;
  491. allocmask = ~allocshift;
  492. create_lock(__master_lock);
  493. const char ** argv, ** envp, ** argp = NULL;
  494. elf_auxv_t * auxp;
  495. /* call to figure out where the arguments are */
  496. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  497. initial_stack = __process_auxv(auxp);
  498. int nauxv = (elf_auxv_t *) initial_stack - auxp;
  499. FIND_LAST_STACK(initial_stack);
  500. #ifdef PROFILE
  501. set_profile_enabled(envp);
  502. #endif
  503. struct newproc_header hdr;
  504. void * cpaddr = NULL;
  505. #ifdef PROFILE
  506. unsigned long begin_create_time = 0;
  507. #endif
  508. BEGIN_PROFILE_INTERVAL();
  509. RUN_INIT(init_signal);
  510. RUN_INIT(init_heap);
  511. RUN_INIT(init_slab);
  512. RUN_INIT(init_str_mgr);
  513. RUN_INIT(init_internal_map);
  514. RUN_INIT(init_vma);
  515. RUN_INIT(init_fs);
  516. RUN_INIT(init_handle);
  517. RUN_INIT(init_randgen);
  518. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  519. if (argc && argv[0][0] == '-') {
  520. if (!memcmp(argv[0], "-resume", 8) && argc >= 2) {
  521. const char * filename = *(argv + 1);
  522. argc -= 2;
  523. argv += 2;
  524. RUN_INIT(init_mount_root);
  525. RUN_INIT(restore_from_checkpoint, filename, &hdr.checkpoint,
  526. &cpaddr);
  527. goto restore;
  528. }
  529. if (!memcmp(argv[0], "-resume-file", 13) && argc >= 2) {
  530. const char * filename = *(argv + 1);
  531. argc -= 2;
  532. argv += 2;
  533. RUN_INIT(init_mount_root);
  534. RUN_INIT(restore_from_file, filename, &hdr.checkpoint, &cpaddr);
  535. goto restore;
  536. }
  537. }
  538. if (PAL_CB(parent_process)) {
  539. RUN_INIT(init_newproc, &hdr);
  540. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  541. hdr.create_time, begin_time);
  542. #ifdef PROFILE
  543. begin_create_time = hdr.begin_create_time;
  544. #endif
  545. if (hdr.checkpoint.data.cpsize)
  546. RUN_INIT(init_checkpoint, &hdr.checkpoint, &cpaddr);
  547. }
  548. if (cpaddr) {
  549. restore:
  550. thread_start_event = DkNotificationEventCreate(0);
  551. RUN_INIT(restore_from_stack, cpaddr, &hdr.checkpoint.data, 0);
  552. }
  553. if (PAL_CB(manifest_handle))
  554. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  555. RUN_INIT(init_mount_root);
  556. RUN_INIT(init_ipc);
  557. RUN_INIT(init_thread);
  558. RUN_INIT(init_important_handles);
  559. RUN_INIT(init_mount);
  560. RUN_INIT(init_async);
  561. RUN_INIT(init_stack, argv, envp, &argp, nauxv, &auxp);
  562. RUN_INIT(read_environs, envp);
  563. RUN_INIT(init_loader);
  564. RUN_INIT(init_ipc_helper);
  565. debug("shim process initialized\n");
  566. #ifdef PROFILE
  567. if (begin_create_time)
  568. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  569. begin_create_time);
  570. #endif
  571. if (thread_start_event)
  572. DkEventSet(thread_start_event);
  573. shim_tcb_t * cur_tcb = SHIM_GET_TLS();
  574. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  575. if (cur_tcb->context.sp)
  576. restore_context(&cur_tcb->context);
  577. if (cur_thread->exec)
  578. execute_elf_object(cur_thread->exec,
  579. argc, argp, nauxv, auxp);
  580. *return_stack = initial_stack;
  581. return 0;
  582. }
  583. static int create_unique (int (*mkname) (char *, size_t, void *),
  584. int (*create) (const char *, void *),
  585. int (*output) (char *, size_t, const void *,
  586. struct shim_qstr *),
  587. char * name, size_t size, void * id, void * obj,
  588. struct shim_qstr * qstr)
  589. {
  590. int ret, len;
  591. while (1) {
  592. len = mkname(name, size, id);
  593. if (len < 0)
  594. return len;
  595. if ((ret = create(name, obj)) < 0)
  596. return ret;
  597. if (ret)
  598. continue;
  599. if (output)
  600. return output(name, size, id, qstr);
  601. if (qstr)
  602. qstrsetstr(qstr, name, len);
  603. return len;
  604. }
  605. }
  606. static int name_pipe (char * uri, size_t size, void * id)
  607. {
  608. IDTYPE pipeid;
  609. int len;
  610. if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
  611. return -EACCES;
  612. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  613. return -ERANGE;
  614. *((IDTYPE *) id) = pipeid;
  615. return len;
  616. }
  617. static int open_pipe (const char * uri, void * obj)
  618. {
  619. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  620. if (!pipe)
  621. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  622. -PAL_ERRNO;
  623. if (obj)
  624. *((PAL_HANDLE *) obj) = pipe;
  625. else
  626. DkObjectClose(pipe);
  627. return 0;
  628. }
  629. static int pipe_addr (char * uri, size_t size, const void * id,
  630. struct shim_qstr * qstr)
  631. {
  632. IDTYPE pipeid = *((IDTYPE *) id);
  633. int len;
  634. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  635. return -ERANGE;
  636. if (qstr)
  637. qstrsetstr(qstr, uri, len);
  638. return len;
  639. }
  640. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  641. struct shim_qstr * qstr)
  642. {
  643. IDTYPE pipeid;
  644. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  645. uri, size, &pipeid, hdl, qstr);
  646. if (ret > 0 && id)
  647. *id = pipeid;
  648. return ret;
  649. }
  650. static int name_path (char * path, size_t size, void * id)
  651. {
  652. unsigned int suffix;
  653. int prefix_len = strlen(path);
  654. int len;
  655. if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
  656. return -EACCES;
  657. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  658. if (len == size)
  659. return -ERANGE;
  660. *((unsigned int *) id) = suffix;
  661. return prefix_len + len;
  662. }
  663. static int open_dir (const char * path, void * obj)
  664. {
  665. struct shim_handle * dir = NULL;
  666. if (obj) {
  667. dir = get_new_handle();
  668. if (!dir)
  669. return -ENOMEM;
  670. }
  671. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  672. NULL);
  673. if (ret < 0)
  674. return ret = -EEXIST ? 1 : ret;
  675. if (obj)
  676. *((struct shim_handle **) obj) = dir;
  677. return 0;
  678. }
  679. static int open_file (const char * path, void * obj)
  680. {
  681. struct shim_handle * file = NULL;
  682. if (obj) {
  683. file = get_new_handle();
  684. if (!file)
  685. return -ENOMEM;
  686. }
  687. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  688. NULL);
  689. if (ret < 0)
  690. return ret = -EEXIST ? 1 : ret;
  691. if (obj)
  692. *((struct shim_handle **) obj) = file;
  693. return 0;
  694. }
  695. static int open_pal_handle (const char * uri, void * obj)
  696. {
  697. PAL_HANDLE hdl;
  698. if (!memcmp(uri, "dir:", 4))
  699. hdl = DkStreamOpen(uri, 0,
  700. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  701. PAL_SHARE_OWNER_R,
  702. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  703. 0);
  704. else
  705. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  706. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  707. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  708. 0);
  709. if (!hdl) {
  710. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  711. return 0;
  712. else
  713. return -PAL_ERRNO;
  714. }
  715. if (obj)
  716. *((PAL_HANDLE *) obj) = hdl;
  717. return 0;
  718. }
  719. static int output_path (char * path, size_t size, const void * id,
  720. struct shim_qstr * qstr)
  721. {
  722. int len = strlen(path);
  723. if (qstr)
  724. qstrsetstr(qstr, path, len);
  725. return len;
  726. }
  727. int create_dir (const char * prefix, char * path, size_t size,
  728. struct shim_handle ** hdl)
  729. {
  730. unsigned int suffix;
  731. if (prefix) {
  732. int len = strlen(prefix);
  733. if (len >= size)
  734. return -ERANGE;
  735. memcpy(path, prefix, len + 1);
  736. }
  737. return create_unique(&name_path, &open_dir, &output_path, path, size,
  738. &suffix, hdl, NULL);
  739. }
  740. int create_file (const char * prefix, char * path, size_t size,
  741. struct shim_handle ** hdl)
  742. {
  743. unsigned int suffix;
  744. if (prefix) {
  745. int len = strlen(prefix);
  746. if (len >= size)
  747. return -ERANGE;
  748. memcpy(path, prefix, len + 1);
  749. }
  750. return create_unique(&name_path, &open_file, &output_path, path, size,
  751. &suffix, hdl, NULL);
  752. }
  753. int create_handle (const char * prefix, char * uri, size_t size,
  754. PAL_HANDLE * hdl, unsigned int * id)
  755. {
  756. unsigned int suffix;
  757. if (prefix) {
  758. int len = strlen(prefix);
  759. if (len >= size)
  760. return -ERANGE;
  761. memcpy(uri, prefix, len + 1);
  762. }
  763. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  764. id ? : &suffix, hdl, NULL);
  765. }
  766. #ifdef PROFILE
  767. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  768. int level)
  769. {
  770. unsigned long total_interval_time = 0;
  771. unsigned long total_interval_count = 0;
  772. for (int i = 0 ; i < N_PROFILE ; i++) {
  773. struct shim_profile * profile = &PROFILES[i];
  774. if (profile->root != root || profile->disabled)
  775. continue;
  776. switch (profile->type) {
  777. case OCCURENCE: {
  778. unsigned int count =
  779. atomic_read(&profile->val.occurence.count);
  780. if (count) {
  781. for (int j = 0 ; j < level ; j++)
  782. __sys_fprintf(hdl, " ");
  783. __sys_fprintf(hdl, "- %s: %u times\n", profile->name, count);
  784. }
  785. break;
  786. }
  787. case INTERVAL: {
  788. unsigned int count =
  789. atomic_read(&profile->val.interval.count);
  790. if (count) {
  791. unsigned long time =
  792. atomic_read(&profile->val.interval.time);
  793. unsigned long ind_time = time / count;
  794. total_interval_time += time;
  795. total_interval_count += count;
  796. for (int j = 0 ; j < level ; j++)
  797. __sys_fprintf(hdl, " ");
  798. __sys_fprintf(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  799. time, profile->name, count, ind_time);
  800. }
  801. break;
  802. }
  803. case CATAGORY:
  804. for (int j = 0 ; j < level ; j++)
  805. __sys_fprintf(hdl, " ");
  806. __sys_fprintf(hdl, "- %s:\n", profile->name);
  807. print_profile_result(hdl, profile, level + 1);
  808. break;
  809. }
  810. }
  811. if (total_interval_count) {
  812. __sys_fprintf(hdl, "- (%11.11u) total: %u times, %lu msec\n",
  813. total_interval_time, total_interval_count,
  814. total_interval_time / total_interval_count);
  815. }
  816. }
  817. #endif /* PROFILE */
  818. static struct shim_atomic in_terminate = { .counter = 0, };
  819. int shim_terminate (void)
  820. {
  821. debug("teminating the whole process\n");
  822. /* do last clean-up of the process */
  823. shim_clean();
  824. DkProcessExit(0);
  825. return 0;
  826. }
  827. int shim_clean (void)
  828. {
  829. /* preventing multiple cleanup, this is mostly caused by
  830. assertion in shim_clean */
  831. atomic_inc(&in_terminate);
  832. if (atomic_read(&in_terminate) > 1)
  833. return 0;
  834. store_all_msg_persist();
  835. #ifdef PROFILE
  836. if (ENTER_TIME) {
  837. switch (SHIM_GET_TLS()->context.syscall_nr) {
  838. case __NR_exit_group:
  839. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  840. break;
  841. case __NR_exit:
  842. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  843. break;
  844. }
  845. }
  846. if (ipc_cld_profile_send()) {
  847. master_lock();
  848. PAL_HANDLE hdl = __open_shim_stdio();
  849. if (hdl) {
  850. __sys_fprintf(hdl, "******************************\n");
  851. __sys_fprintf(hdl, "profiling:\n");
  852. print_profile_result(hdl, &profile_root, 0);
  853. __sys_fprintf(hdl, "******************************\n");
  854. }
  855. master_unlock();
  856. }
  857. #endif
  858. del_all_ipc_ports(0);
  859. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  860. DkObjectClose(shim_stdio);
  861. shim_stdio = NULL;
  862. debug("process %u successfully terminated\n", cur_process.vmid);
  863. master_lock();
  864. DkProcessExit(cur_process.exit_code);
  865. return 0;
  866. }
  867. int message_confirm (const char * message, const char * options)
  868. {
  869. char answer;
  870. int noptions = strlen(options);
  871. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  872. int ret = 0;
  873. *(str++) = ' ';
  874. *(str++) = '[';
  875. for (int i = 0 ; i < noptions ; i++) {
  876. *(str++) = options[i];
  877. *(str++) = '/';
  878. }
  879. str--;
  880. *(str++) = ']';
  881. *(str++) = ' ';
  882. master_lock();
  883. PAL_HANDLE hdl = __open_shim_stdio();
  884. if (!hdl) {
  885. master_unlock();
  886. return -EACCES;
  887. }
  888. #define WRITE(buf, len) \
  889. ({ int _ret = DkStreamWrite(hdl, 0, len, buf, NULL); \
  890. _ret ? : -PAL_ERRNO; })
  891. #define READ(buf, len) \
  892. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  893. _ret ? : -PAL_ERRNO; })
  894. if ((ret = WRITE(message, strlen(message))) < 0)
  895. goto out;
  896. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  897. goto out;
  898. if ((ret = READ(&answer, 1)) < 0)
  899. goto out;
  900. out:
  901. master_unlock();
  902. return (ret < 0) ? ret : answer;
  903. }