shim_init.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_tls.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_debug.h>
  31. #include <pal_error.h>
  32. #include <sys/mman.h>
  33. #include <asm/unistd.h>
  34. #include <asm/fcntl.h>
  35. unsigned long allocsize;
  36. unsigned long allocshift;
  37. unsigned long allocmask;
  38. /* The following constants will help matching glibc version with compatible
  39. SHIM libraries */
  40. #include "glibc-version.h"
  41. const unsigned int glibc_version = GLIBC_VERSION;
  42. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  43. {
  44. SHIM_GET_TLS()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  45. }
  46. void __assert_fail (const char * assertion, const char * file,
  47. unsigned int line, const char * function)
  48. {
  49. __sys_printf("assert failed %s:%d %s\n", file, line, assertion);
  50. pause();
  51. shim_terminate();
  52. }
  53. void __stack_chk_fail (void)
  54. {
  55. }
  56. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  57. /* reserved */ 0,
  58. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  59. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  60. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  61. /* PAL_ERROR_INVAL */ EINVAL,
  62. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  63. /* PAL_ERROR_DENIED */ EACCES,
  64. /* PAL_ERROR_BADHANDLE */ EFAULT,
  65. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  66. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  67. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  68. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  69. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  70. /* PAL_ERROR_INTERRUPTED */ EINTR,
  71. /* PAL_ERROR_OVERFLOW */ EFAULT,
  72. /* PAL_ERROR_BADADDR */ EFAULT,
  73. /* PAL_ERROR_NOMEM */ ENOMEM,
  74. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  75. /* PAL_ERROR_INCONSIST */ EFAULT,
  76. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  77. /* PAL_ERROR_ENDOFSTREAM */ 0,
  78. /* PAL_ERROR_NOTSERVER */ EINVAL,
  79. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  80. /* PAL_ERROR_ZEROSIZE */ 0,
  81. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  82. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  83. };
  84. long convert_pal_errno (long err)
  85. {
  86. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  87. pal_errno_to_unix_errno[err] : 0;
  88. }
  89. unsigned long parse_int (const char * str)
  90. {
  91. unsigned long num = 0;
  92. int radix = 10;
  93. char c;
  94. if (str[0] == '0') {
  95. str++;
  96. radix = 8;
  97. if (str[0] == 'x') {
  98. str++;
  99. radix = 16;
  100. }
  101. }
  102. while ((c = *(str++))) {
  103. int val;
  104. if (c >= 'A' && c <= 'F')
  105. val = c - 'A' + 10;
  106. else if (c >= 'a' && c <= 'f')
  107. val = c - 'a' + 10;
  108. else if (c >= '0' && c <= '9')
  109. val = c - '0';
  110. else
  111. break;
  112. if (val >= radix)
  113. break;
  114. num = num * radix + val;
  115. }
  116. if (c == 'G' || c == 'g')
  117. num *= 1024 * 1024 * 1024;
  118. else if (c == 'M' || c == 'm')
  119. num *= 1024 * 1024;
  120. else if (c == 'K' || c == 'k')
  121. num *= 1024;
  122. return num;
  123. }
  124. long int glibc_option (const char * opt)
  125. {
  126. char cfg[CONFIG_MAX];
  127. if (strcmp_static(opt, "heap_size")) {
  128. int ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  129. if (ret < 0) {
  130. debug("no glibc option: %s (err=%d)\n", opt, ret);
  131. return -ENOENT;
  132. }
  133. long int heap_size = parse_int(cfg);
  134. debug("glibc option: heap_size = %ld\n", heap_size);
  135. return (long int) heap_size;
  136. }
  137. return -EINVAL;
  138. }
  139. void * migrated_memory_start;
  140. void * migrated_memory_end;
  141. void * migrated_shim_addr;
  142. void * initial_stack;
  143. const char ** initial_envp __attribute_migratable;
  144. const char ** library_paths;
  145. LOCKTYPE __master_lock;
  146. bool lock_enabled;
  147. void init_tcb (shim_tcb_t * tcb)
  148. {
  149. tcb->canary = SHIM_TLS_CANARY;
  150. tcb->self = tcb;
  151. }
  152. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  153. {
  154. memset(new_tcb, 0, sizeof(shim_tcb_t));
  155. new_tcb->canary = SHIM_TLS_CANARY;
  156. new_tcb->self = new_tcb;
  157. new_tcb->tp = old_tcb->tp;
  158. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  159. new_tcb->tid = old_tcb->tid;
  160. new_tcb->debug_buf = old_tcb->debug_buf;
  161. }
  162. /* This function is used to allocate tls before interpreter start running */
  163. void allocate_tls (void * tcb_location, bool user, struct shim_thread * thread)
  164. {
  165. __libc_tcb_t * tcb = tcb_location;
  166. assert(tcb);
  167. tcb->tcb = tcb;
  168. init_tcb(&tcb->shim_tcb);
  169. if (thread) {
  170. thread->tcb = tcb;
  171. thread->user_tcb = user;
  172. tcb->shim_tcb.tp = thread;
  173. tcb->shim_tcb.tid = thread->tid;
  174. } else {
  175. tcb->shim_tcb.tp = NULL;
  176. tcb->shim_tcb.tid = 0;
  177. }
  178. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  179. assert(SHIM_TLS_CHECK_CANARY());
  180. }
  181. void populate_tls (void * tcb_location, bool user)
  182. {
  183. __libc_tcb_t * tcb = (__libc_tcb_t *) tcb_location;
  184. assert(tcb);
  185. tcb->tcb = tcb;
  186. copy_tcb(&tcb->shim_tcb, SHIM_GET_TLS());
  187. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  188. if (thread) {
  189. thread->tcb = tcb;
  190. thread->user_tcb = user;
  191. }
  192. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  193. assert(SHIM_TLS_CHECK_CANARY());
  194. }
  195. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  196. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  197. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  198. void * allocate_stack (size_t size, size_t protect_size, bool user)
  199. {
  200. size = ALIGN_UP(size);
  201. protect_size = ALIGN_UP(protect_size);
  202. /* preserve a non-readable, non-writeable page below the user
  203. stack to stop user program to clobber other vmas */
  204. void * stack = user ?
  205. get_unmapped_vma(size + protect_size, STACK_FLAGS) :
  206. NULL;
  207. if (user)
  208. stack = (void *) DkVirtualMemoryAlloc(stack, size + protect_size,
  209. 0, PAL_PROT_READ|PAL_PROT_WRITE);
  210. else
  211. stack = system_malloc(size + protect_size);
  212. if (!stack)
  213. return NULL;
  214. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  215. INC_PROFILE_OCCURENCE(alloc_stack_count);
  216. if (protect_size &&
  217. !DkVirtualMemoryProtect(stack, protect_size, PAL_PROT_NONE))
  218. return NULL;
  219. stack += protect_size;
  220. if (user) {
  221. if (bkeep_mmap(stack, size, PROT_READ|PROT_WRITE,
  222. STACK_FLAGS, NULL, 0, "stack") < 0)
  223. return NULL;
  224. if (protect_size &&
  225. bkeep_mmap(stack - protect_size, protect_size, 0,
  226. STACK_FLAGS, NULL, 0, NULL) < 0)
  227. return NULL;
  228. }
  229. debug("allocated stack at %p (size = %d)\n", stack, size);
  230. return stack;
  231. }
  232. int populate_user_stack (void * stack, size_t stack_size,
  233. int nauxv, elf_auxv_t ** auxpp,
  234. const char *** argvp, const char *** envpp)
  235. {
  236. const char ** argv = *argvp, ** envp = *envpp;
  237. const char ** new_argv = NULL, ** new_envp = NULL;
  238. void * stack_bottom = stack;
  239. void * stack_top = stack + stack_size;
  240. #define ALLOCATE_TOP(size) \
  241. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  242. stack_top; })
  243. #define ALLOCATE_BOTTOM(size) \
  244. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  245. stack_bottom - (size); })
  246. if (!argv) {
  247. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  248. goto copy_envp;
  249. }
  250. new_argv = stack_bottom;
  251. while (argv) {
  252. for (const char ** a = argv ; *a ; a++) {
  253. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  254. int len = strlen(*a) + 1;
  255. char * abuf = ALLOCATE_TOP(len);
  256. memcpy(abuf, *a, len);
  257. *t = abuf;
  258. }
  259. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  260. copy_envp:
  261. if (!envp)
  262. break;
  263. new_envp = stack_bottom;
  264. argv = envp;
  265. envp = NULL;
  266. }
  267. if (!new_envp)
  268. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  269. stack_bottom = (void *) ((unsigned long) stack_bottom & ~7UL);
  270. *((unsigned long *) ALLOCATE_TOP(sizeof(unsigned long))) = 0;
  271. if (nauxv) {
  272. elf_auxv_t * old_auxp = *auxpp;
  273. *auxpp = ALLOCATE_TOP(sizeof(elf_auxv_t) * nauxv);
  274. if (old_auxp)
  275. memcpy(*auxpp, old_auxp, nauxv * sizeof(elf_auxv_t));
  276. }
  277. memmove(stack_top - (stack_bottom - stack), stack, stack_bottom - stack);
  278. if (new_argv)
  279. *argvp = (void *) new_argv + (stack_top - stack_bottom);
  280. if (new_envp)
  281. *envpp = (void *) new_envp + (stack_top - stack_bottom);
  282. return 0;
  283. }
  284. unsigned long sys_stack_size = 0;
  285. int init_stack (const char ** argv, const char ** envp, const char *** argpp,
  286. int nauxv, elf_auxv_t ** auxpp)
  287. {
  288. if (!sys_stack_size) {
  289. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  290. if (root_config) {
  291. char stack_cfg[CONFIG_MAX];
  292. if (get_config(root_config, "sys.stack.size", stack_cfg,
  293. CONFIG_MAX) > 0)
  294. sys_stack_size = ALIGN_UP(parse_int(stack_cfg));
  295. }
  296. }
  297. struct shim_thread * cur_thread = get_cur_thread();
  298. if (!cur_thread || cur_thread->stack)
  299. return 0;
  300. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  301. if (!stack)
  302. return -ENOMEM;
  303. if (initial_envp)
  304. envp = initial_envp;
  305. int ret = populate_user_stack(stack, sys_stack_size,
  306. nauxv, auxpp, &argv, &envp);
  307. if (ret < 0)
  308. return ret;
  309. *argpp = argv;
  310. initial_envp = envp;
  311. cur_thread->stack_top = stack + sys_stack_size;
  312. cur_thread->stack = stack;
  313. cur_thread->stack_red = stack - allocsize;
  314. return 0;
  315. }
  316. int read_environs (const char ** envp)
  317. {
  318. for (const char ** e = envp ; *e ; e++) {
  319. switch ((*e)[0]) {
  320. case 'L': {
  321. if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
  322. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  323. int npaths = 0;
  324. for (const char * tmp = s ; *tmp ; tmp++)
  325. if (*tmp == ':')
  326. npaths++;
  327. const char ** paths = malloc(sizeof(const char *) *
  328. (npaths + 1));
  329. if (!paths)
  330. return -ENOMEM;
  331. int cnt = 0;
  332. while (*s) {
  333. const char * next;
  334. for (next = s ; *next && *next != ':' ; next++);
  335. int len = next - s;
  336. char * str = malloc(len + 1);
  337. if (!str)
  338. return -ENOMEM;
  339. memcpy(str, s, len);
  340. str[len] = 0;
  341. paths[cnt++] = str;
  342. s = *next ? next + 1 : next;
  343. }
  344. paths[cnt] = NULL;
  345. library_paths = paths;
  346. break;
  347. }
  348. break;
  349. }
  350. }
  351. }
  352. return 0;
  353. }
  354. struct config_store * root_config = NULL;
  355. static void * __malloc (int size)
  356. {
  357. return malloc(size);
  358. }
  359. static void __free (void * mem)
  360. {
  361. free(mem);
  362. }
  363. int init_manifest (PAL_HANDLE manifest_handle)
  364. {
  365. void * addr;
  366. unsigned int size;
  367. if (PAL_CB(manifest_preload.start)) {
  368. addr = PAL_CB(manifest_preload.start);
  369. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  370. } else {
  371. PAL_STREAM_ATTR attr;
  372. if (!DkStreamAttributesQuerybyHandle(manifest_handle, &attr))
  373. return -PAL_ERRNO;
  374. size = attr.pending_size;
  375. addr = (void *) DkStreamMap(manifest_handle, NULL,
  376. PAL_PROT_READ, 0,
  377. ALIGN_UP(size));
  378. if (!addr)
  379. return -PAL_ERRNO;
  380. }
  381. bkeep_mmap(addr, ALIGN_UP(size), PROT_READ,
  382. MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL, NULL, 0,
  383. "manifest");
  384. root_config = malloc(sizeof(struct config_store));
  385. root_config->raw_data = addr;
  386. root_config->raw_size = size;
  387. root_config->malloc = __malloc;
  388. root_config->free = __free;
  389. const char * errstring = "Unexpected error";
  390. int ret = 0;
  391. if ((ret = read_config(root_config, NULL, &errstring)) < 0) {
  392. root_config = NULL;
  393. sys_printf("Unable to read manifest file: %s\n", errstring);
  394. return ret;
  395. }
  396. return 0;
  397. }
  398. #ifdef PROFILE
  399. struct shim_profile profile_root;
  400. #endif
  401. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  402. do { \
  403. void *_tmp = (cookie); \
  404. (argv) = _tmp; \
  405. _tmp += sizeof(char *) * ((argc) + 1); \
  406. (envp) = _tmp; \
  407. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  408. (auxp) = _tmp + sizeof(char *); \
  409. } while (0)
  410. static void * __process_auxv (elf_auxv_t * auxp)
  411. {
  412. elf_auxv_t * av;
  413. for (av = auxp; av->a_type != AT_NULL; av++)
  414. switch (av->a_type) {
  415. default: break;
  416. }
  417. return av + 1;
  418. }
  419. #define FIND_LAST_STACK(stack) \
  420. do { \
  421. /* check if exist a NULL end */ \
  422. assert(*(uint64_t *) stack == 0); \
  423. stack += sizeof(uint64_t); \
  424. } while (0)
  425. #ifdef PROFILE
  426. static void set_profile_enabled (const char ** envp)
  427. {
  428. const char ** p;
  429. for (p = envp ; (*p) ; p++)
  430. if (strpartcmp_static(*p, "PROFILE_ENABLED="))
  431. break;
  432. if (!(*p))
  433. return;
  434. for (int i = 0 ; i < N_PROFILE ; i++)
  435. PROFILES[i].disabled = true;
  436. const char * str = (*p) + 16;
  437. bool enabled = false;
  438. while (*str) {
  439. const char * next = str;
  440. for ( ; (*next) && (*next) != ',' ; next++);
  441. if (next > str) {
  442. int len = next - str;
  443. for (int i = 0 ; i < N_PROFILE ; i++) {
  444. struct shim_profile * profile = &PROFILES[i];
  445. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  446. profile->disabled = false;
  447. if (profile->type == CATAGORY)
  448. enabled = true;
  449. }
  450. }
  451. }
  452. str = (*next) ? next + 1 : next;
  453. }
  454. while (enabled) {
  455. enabled = false;
  456. for (int i = 0 ; i < N_PROFILE ; i++) {
  457. struct shim_profile * profile = &PROFILES[i];
  458. if (!profile->disabled || profile->root == &profile_)
  459. continue;
  460. if (!profile->root->disabled) {
  461. profile->disabled = false;
  462. if (profile->type == CATAGORY)
  463. enabled = true;
  464. }
  465. }
  466. }
  467. for (int i = 0 ; i < N_PROFILE ; i++) {
  468. struct shim_profile * profile = &PROFILES[i];
  469. if (profile->type == CATAGORY || profile->disabled)
  470. continue;
  471. for (profile = profile->root ;
  472. profile != &profile_ && profile->disabled ;
  473. profile = profile->root)
  474. profile->disabled = false;
  475. }
  476. }
  477. #endif
  478. static int init_newproc (struct newproc_header * hdr)
  479. {
  480. BEGIN_PROFILE_INTERVAL();
  481. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  482. sizeof(struct newproc_header), hdr,
  483. NULL, 0);
  484. if (!bytes)
  485. return -PAL_ERRNO;
  486. SAVE_PROFILE_INTERVAL(child_wait_header);
  487. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  488. return hdr->failure;
  489. }
  490. DEFINE_PROFILE_CATAGORY(pal, );
  491. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  492. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  493. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  494. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  495. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  496. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  497. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  498. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  499. DEFINE_PROFILE_CATAGORY(init, );
  500. DEFINE_PROFILE_INTERVAL(init_randgen, init);
  501. DEFINE_PROFILE_INTERVAL(init_heap, init);
  502. DEFINE_PROFILE_INTERVAL(init_slab, init);
  503. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  504. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  505. DEFINE_PROFILE_INTERVAL(init_vma, init);
  506. DEFINE_PROFILE_INTERVAL(init_fs, init);
  507. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  508. DEFINE_PROFILE_INTERVAL(init_handle, init);
  509. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  510. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  511. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  512. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  513. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  514. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  515. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  516. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  517. DEFINE_PROFILE_INTERVAL(init_thread, init);
  518. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  519. DEFINE_PROFILE_INTERVAL(init_mount, init);
  520. DEFINE_PROFILE_INTERVAL(init_async, init);
  521. DEFINE_PROFILE_INTERVAL(init_stack, init);
  522. DEFINE_PROFILE_INTERVAL(read_environs, init);
  523. DEFINE_PROFILE_INTERVAL(init_loader, init);
  524. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  525. DEFINE_PROFILE_INTERVAL(init_signal, init);
  526. #define CALL_INIT(func, args ...) func(args)
  527. #define RUN_INIT(func, ...) \
  528. do { \
  529. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  530. if (_err < 0) { \
  531. debug("initialization failed in " #func " (%d)\n", _err); \
  532. shim_terminate(); \
  533. } \
  534. SAVE_PROFILE_INTERVAL(func); \
  535. } while (0)
  536. extern PAL_HANDLE thread_start_event;
  537. int shim_init (int argc, void * args, void ** return_stack)
  538. {
  539. debug_handle = PAL_CB(debug_stream);
  540. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  541. /* create the initial TCB, shim can not be run without a tcb */
  542. __libc_tcb_t tcb;
  543. memset(&tcb, 0, sizeof(__libc_tcb_t));
  544. allocate_tls(&tcb, false, NULL);
  545. debug_setbuf(&tcb.shim_tcb, true);
  546. debug("set tcb to %p\n", &tcb);
  547. #ifdef PROFILE
  548. unsigned long begin_time = GET_PROFILE_INTERVAL();
  549. #endif
  550. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE, 0);
  551. allocsize = PAL_CB(alloc_align);
  552. allocshift = allocsize - 1;
  553. allocmask = ~allocshift;
  554. create_lock(__master_lock);
  555. const char ** argv, ** envp, ** argp = NULL;
  556. elf_auxv_t * auxp;
  557. /* call to figure out where the arguments are */
  558. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  559. initial_stack = __process_auxv(auxp);
  560. int nauxv = (elf_auxv_t *) initial_stack - auxp;
  561. FIND_LAST_STACK(initial_stack);
  562. #ifdef PROFILE
  563. set_profile_enabled(envp);
  564. #endif
  565. struct newproc_header hdr;
  566. void * cpaddr = NULL;
  567. #ifdef PROFILE
  568. unsigned long begin_create_time = 0;
  569. #endif
  570. BEGIN_PROFILE_INTERVAL();
  571. RUN_INIT(init_randgen);
  572. RUN_INIT(init_heap);
  573. RUN_INIT(init_slab);
  574. RUN_INIT(read_environs, envp);
  575. RUN_INIT(init_str_mgr);
  576. RUN_INIT(init_internal_map);
  577. RUN_INIT(init_vma);
  578. RUN_INIT(init_fs);
  579. RUN_INIT(init_dcache);
  580. RUN_INIT(init_handle);
  581. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  582. if (argc && argv[0][0] == '-') {
  583. if (strcmp_static(argv[0], "-resume") && argc >= 2) {
  584. const char * filename = *(argv + 1);
  585. argc -= 2;
  586. argv += 2;
  587. RUN_INIT(init_mount_root);
  588. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  589. &cpaddr);
  590. goto restore;
  591. }
  592. }
  593. if (PAL_CB(parent_process)) {
  594. RUN_INIT(init_newproc, &hdr);
  595. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  596. hdr.create_time, begin_time);
  597. #ifdef PROFILE
  598. begin_create_time = hdr.begin_create_time;
  599. #endif
  600. if (hdr.checkpoint.hdr.size)
  601. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  602. }
  603. if (cpaddr) {
  604. restore:
  605. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  606. RUN_INIT(restore_checkpoint,
  607. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  608. (ptr_t) cpaddr, 0);
  609. }
  610. if (PAL_CB(manifest_handle))
  611. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  612. RUN_INIT(init_mount_root);
  613. RUN_INIT(init_ipc);
  614. RUN_INIT(init_thread);
  615. RUN_INIT(init_mount);
  616. RUN_INIT(init_important_handles);
  617. RUN_INIT(init_async);
  618. RUN_INIT(init_stack, argv, envp, &argp, nauxv, &auxp);
  619. RUN_INIT(init_loader);
  620. RUN_INIT(init_ipc_helper);
  621. RUN_INIT(init_signal);
  622. debug("shim process initialized\n");
  623. #ifdef PROFILE
  624. if (begin_create_time)
  625. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  626. begin_create_time);
  627. #endif
  628. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  629. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  630. pal_control.host_specific_startup_time);
  631. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  632. pal_control.relocation_time);
  633. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  634. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  635. pal_control.manifest_loading_time);
  636. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  637. pal_control.allocation_time);
  638. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  639. pal_control.tail_startup_time);
  640. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  641. pal_control.child_creation_time);
  642. if (thread_start_event)
  643. DkEventSet(thread_start_event);
  644. shim_tcb_t * cur_tcb = SHIM_GET_TLS();
  645. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  646. if (cur_tcb->context.sp)
  647. restore_context(&cur_tcb->context);
  648. if (cur_thread->exec)
  649. execute_elf_object(cur_thread->exec,
  650. argc, argp, nauxv, auxp);
  651. *return_stack = initial_stack;
  652. return 0;
  653. }
  654. static int create_unique (int (*mkname) (char *, size_t, void *),
  655. int (*create) (const char *, void *),
  656. int (*output) (char *, size_t, const void *,
  657. struct shim_qstr *),
  658. char * name, size_t size, void * id, void * obj,
  659. struct shim_qstr * qstr)
  660. {
  661. int ret, len;
  662. while (1) {
  663. len = mkname(name, size, id);
  664. if (len < 0)
  665. return len;
  666. if ((ret = create(name, obj)) < 0)
  667. return ret;
  668. if (ret)
  669. continue;
  670. if (output)
  671. return output(name, size, id, qstr);
  672. if (qstr)
  673. qstrsetstr(qstr, name, len);
  674. return len;
  675. }
  676. }
  677. static int name_pipe (char * uri, size_t size, void * id)
  678. {
  679. IDTYPE pipeid;
  680. int len;
  681. if (getrand(&pipeid, sizeof(IDTYPE)) < sizeof(IDTYPE))
  682. return -EACCES;
  683. debug("creating pipe: pipe.srv:%u\n", pipeid);
  684. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  685. return -ERANGE;
  686. *((IDTYPE *) id) = pipeid;
  687. return len;
  688. }
  689. static int open_pipe (const char * uri, void * obj)
  690. {
  691. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  692. if (!pipe)
  693. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  694. -PAL_ERRNO;
  695. if (obj)
  696. *((PAL_HANDLE *) obj) = pipe;
  697. else
  698. DkObjectClose(pipe);
  699. return 0;
  700. }
  701. static int pipe_addr (char * uri, size_t size, const void * id,
  702. struct shim_qstr * qstr)
  703. {
  704. IDTYPE pipeid = *((IDTYPE *) id);
  705. int len;
  706. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  707. return -ERANGE;
  708. if (qstr)
  709. qstrsetstr(qstr, uri, len);
  710. return len;
  711. }
  712. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  713. struct shim_qstr * qstr)
  714. {
  715. IDTYPE pipeid;
  716. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  717. uri, size, &pipeid, hdl, qstr);
  718. if (ret > 0 && id)
  719. *id = pipeid;
  720. return ret;
  721. }
  722. static int name_path (char * path, size_t size, void * id)
  723. {
  724. unsigned int suffix;
  725. int prefix_len = strlen(path);
  726. int len;
  727. if (getrand(&suffix, sizeof(unsigned int)) < sizeof(unsigned int))
  728. return -EACCES;
  729. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  730. if (len == size)
  731. return -ERANGE;
  732. *((unsigned int *) id) = suffix;
  733. return prefix_len + len;
  734. }
  735. static int open_dir (const char * path, void * obj)
  736. {
  737. struct shim_handle * dir = NULL;
  738. if (obj) {
  739. dir = get_new_handle();
  740. if (!dir)
  741. return -ENOMEM;
  742. }
  743. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  744. NULL);
  745. if (ret < 0)
  746. return ret = -EEXIST ? 1 : ret;
  747. if (obj)
  748. *((struct shim_handle **) obj) = dir;
  749. return 0;
  750. }
  751. static int open_file (const char * path, void * obj)
  752. {
  753. struct shim_handle * file = NULL;
  754. if (obj) {
  755. file = get_new_handle();
  756. if (!file)
  757. return -ENOMEM;
  758. }
  759. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  760. NULL);
  761. if (ret < 0)
  762. return ret = -EEXIST ? 1 : ret;
  763. if (obj)
  764. *((struct shim_handle **) obj) = file;
  765. return 0;
  766. }
  767. static int open_pal_handle (const char * uri, void * obj)
  768. {
  769. PAL_HANDLE hdl;
  770. if (strpartcmp_static(uri, "dev:"))
  771. hdl = DkStreamOpen(uri, 0,
  772. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  773. PAL_SHARE_OWNER_R,
  774. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  775. 0);
  776. else
  777. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  778. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  779. PAL_CREAT_TRY|PAL_CREAT_ALWAYS,
  780. 0);
  781. if (!hdl) {
  782. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  783. return 0;
  784. else
  785. return -PAL_ERRNO;
  786. }
  787. if (obj)
  788. *((PAL_HANDLE *) obj) = hdl;
  789. return 0;
  790. }
  791. static int output_path (char * path, size_t size, const void * id,
  792. struct shim_qstr * qstr)
  793. {
  794. int len = strlen(path);
  795. if (qstr)
  796. qstrsetstr(qstr, path, len);
  797. return len;
  798. }
  799. int create_dir (const char * prefix, char * path, size_t size,
  800. struct shim_handle ** hdl)
  801. {
  802. unsigned int suffix;
  803. if (prefix) {
  804. int len = strlen(prefix);
  805. if (len >= size)
  806. return -ERANGE;
  807. memcpy(path, prefix, len + 1);
  808. }
  809. return create_unique(&name_path, &open_dir, &output_path, path, size,
  810. &suffix, hdl, NULL);
  811. }
  812. int create_file (const char * prefix, char * path, size_t size,
  813. struct shim_handle ** hdl)
  814. {
  815. unsigned int suffix;
  816. if (prefix) {
  817. int len = strlen(prefix);
  818. if (len >= size)
  819. return -ERANGE;
  820. memcpy(path, prefix, len + 1);
  821. }
  822. return create_unique(&name_path, &open_file, &output_path, path, size,
  823. &suffix, hdl, NULL);
  824. }
  825. int create_handle (const char * prefix, char * uri, size_t size,
  826. PAL_HANDLE * hdl, unsigned int * id)
  827. {
  828. unsigned int suffix;
  829. if (prefix) {
  830. int len = strlen(prefix);
  831. if (len >= size)
  832. return -ERANGE;
  833. memcpy(uri, prefix, len + 1);
  834. }
  835. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  836. id ? : &suffix, hdl, NULL);
  837. }
  838. void check_stack_hook (void)
  839. {
  840. struct shim_thread * cur_thread = get_cur_thread();
  841. void * rsp;
  842. asm volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  843. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  844. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  845. sys_printf("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  846. rsp, cur_thread->stack, cur_thread->stack_top);
  847. } else {
  848. sys_printf("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  849. rsp, cur_thread->stack, cur_thread->stack_top);
  850. }
  851. }
  852. #ifdef PROFILE
  853. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  854. int level)
  855. {
  856. unsigned long total_interval_time = 0;
  857. unsigned long total_interval_count = 0;
  858. for (int i = 0 ; i < N_PROFILE ; i++) {
  859. struct shim_profile * profile = &PROFILES[i];
  860. if (profile->root != root || profile->disabled)
  861. continue;
  862. switch (profile->type) {
  863. case OCCURENCE: {
  864. unsigned int count =
  865. atomic_read(&profile->val.occurence.count);
  866. if (count) {
  867. for (int j = 0 ; j < level ; j++)
  868. __sys_fprintf(hdl, " ");
  869. __sys_fprintf(hdl, "- %s: %u times\n", profile->name, count);
  870. }
  871. break;
  872. }
  873. case INTERVAL: {
  874. unsigned int count =
  875. atomic_read(&profile->val.interval.count);
  876. if (count) {
  877. unsigned long time =
  878. atomic_read(&profile->val.interval.time);
  879. unsigned long ind_time = time / count;
  880. total_interval_time += time;
  881. total_interval_count += count;
  882. for (int j = 0 ; j < level ; j++)
  883. __sys_fprintf(hdl, " ");
  884. __sys_fprintf(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  885. time, profile->name, count, ind_time);
  886. }
  887. break;
  888. }
  889. case CATAGORY:
  890. for (int j = 0 ; j < level ; j++)
  891. __sys_fprintf(hdl, " ");
  892. __sys_fprintf(hdl, "- %s:\n", profile->name);
  893. print_profile_result(hdl, profile, level + 1);
  894. break;
  895. }
  896. }
  897. if (total_interval_count) {
  898. __sys_fprintf(hdl, " - (%11.11u) total: %u times, %lu msec\n",
  899. total_interval_time, total_interval_count,
  900. total_interval_time / total_interval_count);
  901. }
  902. }
  903. #endif /* PROFILE */
  904. static struct shim_atomic in_terminate = { .counter = 0, };
  905. int shim_terminate (void)
  906. {
  907. debug("teminating the whole process\n");
  908. /* do last clean-up of the process */
  909. shim_clean();
  910. DkProcessExit(0);
  911. return 0;
  912. }
  913. int shim_clean (void)
  914. {
  915. /* preventing multiple cleanup, this is mostly caused by
  916. assertion in shim_clean */
  917. atomic_inc(&in_terminate);
  918. if (atomic_read(&in_terminate) > 1)
  919. return 0;
  920. store_all_msg_persist();
  921. #ifdef PROFILE
  922. if (ENTER_TIME) {
  923. switch (SHIM_GET_TLS()->context.syscall_nr) {
  924. case __NR_exit_group:
  925. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  926. break;
  927. case __NR_exit:
  928. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  929. break;
  930. }
  931. }
  932. if (ipc_cld_profile_send()) {
  933. master_lock();
  934. PAL_HANDLE hdl = __open_shim_stdio();
  935. if (hdl) {
  936. __sys_fprintf(hdl, "******************************\n");
  937. __sys_fprintf(hdl, "profiling:\n");
  938. print_profile_result(hdl, &profile_root, 0);
  939. __sys_fprintf(hdl, "******************************\n");
  940. }
  941. master_unlock();
  942. }
  943. #endif
  944. del_all_ipc_ports(0);
  945. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  946. DkObjectClose(shim_stdio);
  947. shim_stdio = NULL;
  948. debug("process %u successfully terminated\n", cur_process.vmid);
  949. master_lock();
  950. DkProcessExit(cur_process.exit_code);
  951. return 0;
  952. }
  953. int message_confirm (const char * message, const char * options)
  954. {
  955. char answer;
  956. int noptions = strlen(options);
  957. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  958. int ret = 0;
  959. *(str++) = ' ';
  960. *(str++) = '[';
  961. for (int i = 0 ; i < noptions ; i++) {
  962. *(str++) = options[i];
  963. *(str++) = '/';
  964. }
  965. str--;
  966. *(str++) = ']';
  967. *(str++) = ' ';
  968. master_lock();
  969. PAL_HANDLE hdl = __open_shim_stdio();
  970. if (!hdl) {
  971. master_unlock();
  972. return -EACCES;
  973. }
  974. #define WRITE(buf, len) \
  975. ({ int _ret = DkStreamWrite(hdl, 0, len, (void *) buf, NULL); \
  976. _ret ? : -PAL_ERRNO; })
  977. #define READ(buf, len) \
  978. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  979. _ret ? : -PAL_ERRNO; })
  980. if ((ret = WRITE(message, strlen(message))) < 0)
  981. goto out;
  982. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  983. goto out;
  984. if ((ret = READ(&answer, 1)) < 0)
  985. goto out;
  986. out:
  987. master_unlock();
  988. return (ret < 0) ? ret : answer;
  989. }