shim_init.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_init.c
  17. *
  18. * This file contains entry and exit functions of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_table.h>
  22. #include <shim_tls.h>
  23. #include <shim_thread.h>
  24. #include <shim_handle.h>
  25. #include <shim_vma.h>
  26. #include <shim_checkpoint.h>
  27. #include <shim_fs.h>
  28. #include <shim_ipc.h>
  29. #include <shim_profile.h>
  30. #include <pal.h>
  31. #include <pal_debug.h>
  32. #include <pal_error.h>
  33. #include <sys/mman.h>
  34. #include <asm/unistd.h>
  35. #include <asm/fcntl.h>
  36. unsigned long allocsize;
  37. unsigned long allocshift;
  38. unsigned long allocmask;
  39. /* The following constants will help matching glibc version with compatible
  40. SHIM libraries */
  41. #include "glibc-version.h"
  42. const unsigned int glibc_version = GLIBC_VERSION;
  43. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  44. {
  45. shim_get_tls()->pal_errno = (arg <= PAL_ERROR_BOUND) ? arg : 0;
  46. }
  47. noreturn void __abort(void) {
  48. PAUSE();
  49. shim_terminate(-ENOTRECOVERABLE);
  50. }
  51. void warn (const char *format, ...)
  52. {
  53. va_list args;
  54. va_start (args, format);
  55. __SYS_VPRINTF(format, &args);
  56. va_end (args);
  57. }
  58. void __stack_chk_fail (void)
  59. {
  60. }
  61. static int pal_errno_to_unix_errno [PAL_ERROR_BOUND + 1] = {
  62. /* reserved */ 0,
  63. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  64. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  65. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  66. /* PAL_ERROR_INVAL */ EINVAL,
  67. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  68. /* PAL_ERROR_DENIED */ EACCES,
  69. /* PAL_ERROR_BADHANDLE */ EFAULT,
  70. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  71. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  72. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  73. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  74. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  75. /* PAL_ERROR_INTERRUPTED */ EINTR,
  76. /* PAL_ERROR_OVERFLOW */ EFAULT,
  77. /* PAL_ERROR_BADADDR */ EFAULT,
  78. /* PAL_ERROR_NOMEM */ ENOMEM,
  79. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  80. /* PAL_ERROR_INCONSIST */ EFAULT,
  81. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  82. /* PAL_ERROR_ENDOFSTREAM */ 0,
  83. /* PAL_ERROR_NOTSERVER */ EINVAL,
  84. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  85. /* PAL_ERROR_ZEROSIZE */ 0,
  86. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  87. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  88. };
  89. long convert_pal_errno (long err)
  90. {
  91. return (err >= 0 && err <= PAL_ERROR_BOUND) ?
  92. pal_errno_to_unix_errno[err] : 0;
  93. }
  94. unsigned long parse_int (const char * str)
  95. {
  96. unsigned long num = 0;
  97. int radix = 10;
  98. char c;
  99. if (str[0] == '0') {
  100. str++;
  101. radix = 8;
  102. if (str[0] == 'x') {
  103. str++;
  104. radix = 16;
  105. }
  106. }
  107. while ((c = *(str++))) {
  108. int val;
  109. if (c >= 'A' && c <= 'F')
  110. val = c - 'A' + 10;
  111. else if (c >= 'a' && c <= 'f')
  112. val = c - 'a' + 10;
  113. else if (c >= '0' && c <= '9')
  114. val = c - '0';
  115. else
  116. break;
  117. if (val >= radix)
  118. break;
  119. num = num * radix + val;
  120. }
  121. if (c == 'G' || c == 'g')
  122. num *= 1024 * 1024 * 1024;
  123. else if (c == 'M' || c == 'm')
  124. num *= 1024 * 1024;
  125. else if (c == 'K' || c == 'k')
  126. num *= 1024;
  127. return num;
  128. }
  129. long int glibc_option (const char * opt)
  130. {
  131. char cfg[CONFIG_MAX];
  132. if (strcmp_static(opt, "heap_size")) {
  133. ssize_t ret = get_config(root_config, "glibc.heap_size", cfg, CONFIG_MAX);
  134. if (ret <= 0) {
  135. debug("no glibc option: %s (err=%ld)\n", opt, ret);
  136. return -ENOENT;
  137. }
  138. long int heap_size = parse_int(cfg);
  139. debug("glibc option: heap_size = %ld\n", heap_size);
  140. return (long int) heap_size;
  141. }
  142. return -EINVAL;
  143. }
  144. void * migrated_memory_start;
  145. void * migrated_memory_end;
  146. void * migrated_shim_addr;
  147. const char ** initial_envp __attribute_migratable;
  148. /* library_paths is populated with LD_PRELOAD entries once during LibOS
  149. * initialization and is used in __load_interp_object() to search for ELF
  150. * program interpreter in specific paths. Once allocated, its memory is
  151. * never freed or updated. */
  152. char ** library_paths = NULL;
  153. struct shim_lock __master_lock;
  154. bool lock_enabled;
  155. void init_tcb (shim_tcb_t * tcb)
  156. {
  157. tcb->canary = SHIM_TLS_CANARY;
  158. tcb->self = tcb;
  159. }
  160. void copy_tcb (shim_tcb_t * new_tcb, const shim_tcb_t * old_tcb)
  161. {
  162. memset(new_tcb, 0, sizeof(shim_tcb_t));
  163. new_tcb->canary = SHIM_TLS_CANARY;
  164. new_tcb->self = new_tcb;
  165. new_tcb->tp = old_tcb->tp;
  166. memcpy(&new_tcb->context, &old_tcb->context, sizeof(struct shim_context));
  167. new_tcb->tid = old_tcb->tid;
  168. new_tcb->debug_buf = old_tcb->debug_buf;
  169. }
  170. /* This function is used to allocate tls before interpreter start running */
  171. void allocate_tls (__libc_tcb_t * tcb, bool user, struct shim_thread * thread)
  172. {
  173. assert(tcb);
  174. tcb->tcb = tcb;
  175. init_tcb(&tcb->shim_tcb);
  176. if (thread) {
  177. thread->tcb = tcb;
  178. thread->user_tcb = user;
  179. tcb->shim_tcb.tp = thread;
  180. tcb->shim_tcb.tid = thread->tid;
  181. } else {
  182. tcb->shim_tcb.tp = NULL;
  183. tcb->shim_tcb.tid = 0;
  184. }
  185. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  186. assert(shim_tls_check_canary());
  187. }
  188. void populate_tls (__libc_tcb_t * tcb, bool user)
  189. {
  190. assert(tcb);
  191. tcb->tcb = tcb;
  192. copy_tcb(&tcb->shim_tcb, shim_get_tls());
  193. struct shim_thread * thread = (struct shim_thread *) tcb->shim_tcb.tp;
  194. if (thread) {
  195. thread->tcb = tcb;
  196. thread->user_tcb = user;
  197. }
  198. DkSegmentRegister(PAL_SEGMENT_FS, tcb);
  199. assert(shim_tls_check_canary());
  200. }
  201. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  202. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  203. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  204. void * allocate_stack (size_t size, size_t protect_size, bool user)
  205. {
  206. size = ALIGN_UP(size);
  207. protect_size = ALIGN_UP(protect_size);
  208. /* preserve a non-readable, non-writable page below the user
  209. stack to stop user program to clobber other vmas */
  210. void * stack = NULL;
  211. int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
  212. if (user) {
  213. stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
  214. flags, NULL, 0, "stack");
  215. if (!stack)
  216. return NULL;
  217. stack = (void *)
  218. DkVirtualMemoryAlloc(stack, size + protect_size,
  219. 0, PAL_PROT_NONE);
  220. } else {
  221. stack = system_malloc(size + protect_size);
  222. }
  223. if (!stack)
  224. return NULL;
  225. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  226. INC_PROFILE_OCCURENCE(alloc_stack_count);
  227. stack += protect_size;
  228. // Ensure proper alignment for process' initial stack pointer value.
  229. stack += (16 - (uintptr_t)stack % 16) % 16;
  230. DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
  231. if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
  232. return NULL;
  233. debug("allocated stack at %p (size = %ld)\n", stack, size);
  234. return stack;
  235. }
  236. static int populate_user_stack (void * stack, size_t stack_size,
  237. int nauxv, elf_auxv_t ** auxpp,
  238. int ** argcpp,
  239. const char *** argvp, const char *** envpp)
  240. {
  241. const int argc = **argcpp;
  242. const char ** argv = *argvp, ** envp = *envpp;
  243. const char ** new_argv = NULL, ** new_envp = NULL;
  244. elf_auxv_t *new_auxp = NULL;
  245. void * stack_bottom = stack;
  246. void * stack_top = stack + stack_size;
  247. #define ALLOCATE_TOP(size) \
  248. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  249. stack_top; })
  250. #define ALLOCATE_BOTTOM(size) \
  251. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  252. stack_bottom - (size); })
  253. /* ld.so expects argc as long on stack, not int. */
  254. long * argcp = ALLOCATE_BOTTOM(sizeof(long));
  255. *argcp = **argcpp;
  256. if (!argv) {
  257. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  258. goto copy_envp;
  259. }
  260. new_argv = stack_bottom;
  261. while (argv) {
  262. for (const char ** a = argv ; *a ; a++) {
  263. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  264. int len = strlen(*a) + 1;
  265. char * abuf = ALLOCATE_TOP(len);
  266. memcpy(abuf, *a, len);
  267. *t = abuf;
  268. }
  269. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  270. copy_envp:
  271. if (!envp)
  272. break;
  273. new_envp = stack_bottom;
  274. argv = envp;
  275. envp = NULL;
  276. }
  277. if (!new_envp)
  278. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  279. if (nauxv) {
  280. new_auxp = ALLOCATE_BOTTOM(sizeof(elf_auxv_t) * nauxv);
  281. if (*auxpp)
  282. memcpy(new_auxp, *auxpp, nauxv * sizeof(elf_auxv_t));
  283. }
  284. /* reserve at least 16 bytes on the stack to accommodate AT_RANDOM bytes
  285. * later
  286. */
  287. ALLOCATE_TOP(16);
  288. /* x86_64 ABI requires 16 bytes alignment on stack on every function
  289. call. */
  290. size_t move_size = stack_bottom - stack;
  291. *argcpp = stack_top - move_size;
  292. *argcpp = ALIGN_DOWN_PTR(*argcpp, 16UL);
  293. **argcpp = argc;
  294. size_t shift = (void*)(*argcpp) - stack;
  295. memmove(*argcpp, stack, move_size);
  296. *argvp = new_argv ? (void *) new_argv + shift : NULL;
  297. *envpp = new_envp ? (void *) new_envp + shift : NULL;
  298. *auxpp = new_auxp ? (void *) new_auxp + shift : NULL;
  299. /* clear working area at the bottom */
  300. memset(stack, 0, shift);
  301. return 0;
  302. }
  303. unsigned long sys_stack_size = 0;
  304. int init_stack (const char ** argv, const char ** envp,
  305. int ** argcpp, const char *** argpp,
  306. int nauxv, elf_auxv_t ** auxpp)
  307. {
  308. if (!sys_stack_size) {
  309. sys_stack_size = DEFAULT_SYS_STACK_SIZE;
  310. if (root_config) {
  311. char stack_cfg[CONFIG_MAX];
  312. if (get_config(root_config, "sys.stack.size", stack_cfg,
  313. CONFIG_MAX) > 0)
  314. sys_stack_size = ALIGN_UP(parse_int(stack_cfg));
  315. }
  316. }
  317. struct shim_thread * cur_thread = get_cur_thread();
  318. if (!cur_thread || cur_thread->stack)
  319. return 0;
  320. void * stack = allocate_stack(sys_stack_size, allocsize, true);
  321. if (!stack)
  322. return -ENOMEM;
  323. if (initial_envp)
  324. envp = initial_envp;
  325. int ret = populate_user_stack(stack, sys_stack_size,
  326. nauxv, auxpp, argcpp, &argv, &envp);
  327. if (ret < 0)
  328. return ret;
  329. *argpp = argv;
  330. initial_envp = envp;
  331. cur_thread->stack_top = stack + sys_stack_size;
  332. cur_thread->stack = stack;
  333. cur_thread->stack_red = stack - allocsize;
  334. return 0;
  335. }
  336. int read_environs (const char ** envp)
  337. {
  338. for (const char ** e = envp ; *e ; e++) {
  339. if (strpartcmp_static(*e, "LD_LIBRARY_PATH=")) {
  340. /* populate library_paths with entries from LD_LIBRARY_PATH envvar */
  341. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  342. size_t npaths = 2; // One for the first entry, one for the last
  343. // NULL.
  344. for (const char * tmp = s ; *tmp ; tmp++)
  345. if (*tmp == ':')
  346. npaths++;
  347. char** paths = malloc(sizeof(const char *) *
  348. npaths);
  349. if (!paths)
  350. return -ENOMEM;
  351. size_t cnt = 0;
  352. while (*s) {
  353. const char * next;
  354. for (next = s ; *next && *next != ':' ; next++);
  355. size_t len = next - s;
  356. char * str = malloc(len + 1);
  357. if (!str) {
  358. for (size_t i = 0; i < cnt; i++)
  359. free(paths[i]);
  360. free(paths);
  361. return -ENOMEM;
  362. }
  363. memcpy(str, s, len);
  364. str[len] = 0;
  365. paths[cnt++] = str;
  366. s = *next ? next + 1 : next;
  367. }
  368. paths[cnt] = NULL;
  369. assert(!library_paths);
  370. library_paths = paths;
  371. return 0;
  372. }
  373. }
  374. return 0;
  375. }
  376. struct config_store * root_config = NULL;
  377. static void * __malloc (size_t size)
  378. {
  379. return malloc(size);
  380. }
  381. static void __free (void * mem)
  382. {
  383. free(mem);
  384. }
  385. int init_manifest (PAL_HANDLE manifest_handle)
  386. {
  387. int ret = 0;
  388. void * addr = NULL;
  389. size_t size = 0, map_size = 0;
  390. #define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  391. if (PAL_CB(manifest_preload.start)) {
  392. addr = PAL_CB(manifest_preload.start);
  393. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  394. } else {
  395. PAL_STREAM_ATTR attr;
  396. if (!DkStreamAttributesQueryByHandle(manifest_handle, &attr))
  397. return -PAL_ERRNO;
  398. size = attr.pending_size;
  399. map_size = ALIGN_UP(size);
  400. addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
  401. NULL, 0, "manifest");
  402. if (!addr)
  403. return -ENOMEM;
  404. void * ret_addr = DkStreamMap(manifest_handle, addr,
  405. PAL_PROT_READ, 0,
  406. ALIGN_UP(size));
  407. if (!ret_addr) {
  408. bkeep_munmap(addr, map_size, MAP_FLAGS);
  409. return -ENOMEM;
  410. } else {
  411. assert(addr == ret_addr);
  412. }
  413. }
  414. struct config_store * new_root_config = malloc(sizeof(struct config_store));
  415. if (!new_root_config) {
  416. ret = -ENOMEM;
  417. goto fail;
  418. }
  419. new_root_config->raw_data = addr;
  420. new_root_config->raw_size = size;
  421. new_root_config->malloc = __malloc;
  422. new_root_config->free = __free;
  423. const char * errstring = "Unexpected error";
  424. if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
  425. SYS_PRINTF("Unable to read manifest file: %s\n", errstring);
  426. goto fail;
  427. }
  428. root_config = new_root_config;
  429. return 0;
  430. fail:
  431. if (map_size) {
  432. DkStreamUnmap(addr, map_size);
  433. if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
  434. BUG();
  435. }
  436. free(new_root_config);
  437. return ret;
  438. }
  439. #ifdef PROFILE
  440. struct shim_profile profile_root;
  441. #endif
  442. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  443. do { \
  444. void *_tmp = (cookie); \
  445. (argv) = _tmp; \
  446. _tmp += sizeof(char *) * ((argc) + 1); \
  447. (envp) = _tmp; \
  448. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  449. (auxp) = _tmp + sizeof(char *); \
  450. } while (0)
  451. static elf_auxv_t* __process_auxv (elf_auxv_t * auxp)
  452. {
  453. elf_auxv_t * av;
  454. for (av = auxp; av->a_type != AT_NULL; av++)
  455. switch (av->a_type) {
  456. default: break;
  457. }
  458. return av + 1;
  459. }
  460. #ifdef PROFILE
  461. static void set_profile_enabled (const char ** envp)
  462. {
  463. const char ** p;
  464. for (p = envp ; (*p) ; p++)
  465. if (strpartcmp_static(*p, "PROFILE_ENABLED="))
  466. break;
  467. if (!(*p))
  468. return;
  469. for (int i = 0 ; i < N_PROFILE ; i++)
  470. PROFILES[i].disabled = true;
  471. const char * str = (*p) + 16;
  472. bool enabled = false;
  473. while (*str) {
  474. const char * next = str;
  475. for ( ; (*next) && (*next) != ',' ; next++);
  476. if (next > str) {
  477. int len = next - str;
  478. for (int i = 0 ; i < N_PROFILE ; i++) {
  479. struct shim_profile * profile = &PROFILES[i];
  480. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  481. profile->disabled = false;
  482. if (profile->type == CATEGORY)
  483. enabled = true;
  484. }
  485. }
  486. }
  487. str = (*next) ? next + 1 : next;
  488. }
  489. while (enabled) {
  490. enabled = false;
  491. for (int i = 0 ; i < N_PROFILE ; i++) {
  492. struct shim_profile * profile = &PROFILES[i];
  493. if (!profile->disabled || profile->root == &profile_)
  494. continue;
  495. if (!profile->root->disabled) {
  496. profile->disabled = false;
  497. if (profile->type == CATEGORY)
  498. enabled = true;
  499. }
  500. }
  501. }
  502. for (int i = 0 ; i < N_PROFILE ; i++) {
  503. struct shim_profile * profile = &PROFILES[i];
  504. if (profile->type == CATEGORY || profile->disabled)
  505. continue;
  506. for (profile = profile->root ;
  507. profile != &profile_ && profile->disabled ;
  508. profile = profile->root)
  509. profile->disabled = false;
  510. }
  511. }
  512. #endif
  513. static int init_newproc (struct newproc_header * hdr)
  514. {
  515. BEGIN_PROFILE_INTERVAL();
  516. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  517. sizeof(struct newproc_header), hdr,
  518. NULL, 0);
  519. if (!bytes)
  520. return -PAL_ERRNO;
  521. SAVE_PROFILE_INTERVAL(child_wait_header);
  522. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  523. return hdr->failure;
  524. }
  525. DEFINE_PROFILE_CATEGORY(pal, );
  526. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  527. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  528. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  529. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  530. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  531. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  532. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  533. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  534. DEFINE_PROFILE_CATEGORY(init, );
  535. DEFINE_PROFILE_INTERVAL(init_vma, init);
  536. DEFINE_PROFILE_INTERVAL(init_slab, init);
  537. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  538. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  539. DEFINE_PROFILE_INTERVAL(init_fs, init);
  540. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  541. DEFINE_PROFILE_INTERVAL(init_handle, init);
  542. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  543. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  544. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  545. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  546. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  547. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  548. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  549. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  550. DEFINE_PROFILE_INTERVAL(init_thread, init);
  551. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  552. DEFINE_PROFILE_INTERVAL(init_mount, init);
  553. DEFINE_PROFILE_INTERVAL(init_async, init);
  554. DEFINE_PROFILE_INTERVAL(init_stack, init);
  555. DEFINE_PROFILE_INTERVAL(read_environs, init);
  556. DEFINE_PROFILE_INTERVAL(init_loader, init);
  557. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  558. DEFINE_PROFILE_INTERVAL(init_signal, init);
  559. #define CALL_INIT(func, args ...) func(args)
  560. #define RUN_INIT(func, ...) \
  561. do { \
  562. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  563. if (_err < 0) { \
  564. SYS_PRINTF("shim_init() in " #func " (%d)\n", _err); \
  565. shim_terminate(_err); \
  566. } \
  567. SAVE_PROFILE_INTERVAL(func); \
  568. } while (0)
  569. extern PAL_HANDLE thread_start_event;
  570. noreturn void* shim_init (int argc, void * args)
  571. {
  572. debug_handle = PAL_CB(debug_stream);
  573. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  574. /* create the initial TCB, shim can not be run without a tcb */
  575. __libc_tcb_t tcb;
  576. memset(&tcb, 0, sizeof(__libc_tcb_t));
  577. allocate_tls(&tcb, false, NULL);
  578. __disable_preempt(&tcb.shim_tcb); // Temporarily disable preemption for delaying any signal
  579. // that arrives during initialization
  580. debug_setbuf(&tcb.shim_tcb, true);
  581. debug("set tcb to %p\n", &tcb);
  582. #ifdef PROFILE
  583. unsigned long begin_time = GET_PROFILE_INTERVAL();
  584. #endif
  585. debug("host: %s\n", PAL_CB(host_type));
  586. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE);
  587. allocsize = PAL_CB(alloc_align);
  588. allocshift = allocsize - 1;
  589. allocmask = ~allocshift;
  590. create_lock(&__master_lock);
  591. int * argcp = &argc;
  592. const char ** argv, ** envp, ** argp = NULL;
  593. elf_auxv_t * auxp;
  594. /* call to figure out where the arguments are */
  595. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  596. int nauxv = __process_auxv(auxp) - auxp;
  597. #ifdef PROFILE
  598. set_profile_enabled(envp);
  599. #endif
  600. struct newproc_header hdr;
  601. void * cpaddr = NULL;
  602. #ifdef PROFILE
  603. unsigned long begin_create_time = 0;
  604. #endif
  605. BEGIN_PROFILE_INTERVAL();
  606. RUN_INIT(init_vma);
  607. RUN_INIT(init_slab);
  608. RUN_INIT(read_environs, envp);
  609. RUN_INIT(init_str_mgr);
  610. RUN_INIT(init_internal_map);
  611. RUN_INIT(init_fs);
  612. RUN_INIT(init_dcache);
  613. RUN_INIT(init_handle);
  614. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  615. if (argc && argv[0][0] == '-') {
  616. if (strcmp_static(argv[0], "-resume") && argc >= 2) {
  617. const char * filename = *(argv + 1);
  618. argc -= 2;
  619. argv += 2;
  620. RUN_INIT(init_mount_root);
  621. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  622. &cpaddr);
  623. goto restore;
  624. }
  625. }
  626. if (PAL_CB(parent_process)) {
  627. RUN_INIT(init_newproc, &hdr);
  628. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  629. hdr.create_time, begin_time);
  630. #ifdef PROFILE
  631. begin_create_time = hdr.begin_create_time;
  632. #endif
  633. if (hdr.checkpoint.hdr.size)
  634. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  635. }
  636. if (cpaddr) {
  637. restore:
  638. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  639. RUN_INIT(restore_checkpoint,
  640. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  641. (ptr_t) cpaddr, 0);
  642. }
  643. if (PAL_CB(manifest_handle))
  644. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  645. RUN_INIT(init_mount_root);
  646. RUN_INIT(init_ipc);
  647. RUN_INIT(init_thread);
  648. RUN_INIT(init_mount);
  649. RUN_INIT(init_important_handles);
  650. RUN_INIT(init_async);
  651. RUN_INIT(init_stack, argv, envp, &argcp, &argp, nauxv, &auxp);
  652. RUN_INIT(init_loader);
  653. RUN_INIT(init_ipc_helper);
  654. RUN_INIT(init_signal);
  655. if (PAL_CB(parent_process)) {
  656. /* Notify the parent process */
  657. struct newproc_response res;
  658. res.child_vmid = cur_process.vmid;
  659. res.failure = 0;
  660. if (!DkStreamWrite(PAL_CB(parent_process), 0,
  661. sizeof(struct newproc_response),
  662. &res, NULL))
  663. shim_do_exit(-PAL_ERRNO);
  664. }
  665. debug("shim process initialized\n");
  666. #ifdef PROFILE
  667. if (begin_create_time)
  668. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  669. begin_create_time);
  670. #endif
  671. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  672. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  673. pal_control.host_specific_startup_time);
  674. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  675. pal_control.relocation_time);
  676. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  677. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  678. pal_control.manifest_loading_time);
  679. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  680. pal_control.allocation_time);
  681. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  682. pal_control.tail_startup_time);
  683. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  684. pal_control.child_creation_time);
  685. if (thread_start_event)
  686. DkEventSet(thread_start_event);
  687. shim_tcb_t * cur_tcb = shim_get_tls();
  688. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  689. if (cur_tcb->context.sp)
  690. restore_context(&cur_tcb->context);
  691. if (cur_thread->exec)
  692. execute_elf_object(cur_thread->exec,
  693. argcp, argp, nauxv, auxp);
  694. shim_do_exit(0);
  695. }
  696. static int create_unique (int (*mkname) (char *, size_t, void *),
  697. int (*create) (const char *, void *),
  698. int (*output) (char *, size_t, const void *,
  699. struct shim_qstr *),
  700. char * name, size_t size, void * id, void * obj,
  701. struct shim_qstr * qstr)
  702. {
  703. int ret, len;
  704. while (1) {
  705. len = mkname(name, size, id);
  706. if (len < 0)
  707. return len;
  708. if ((ret = create(name, obj)) < 0)
  709. return ret;
  710. if (ret)
  711. continue;
  712. if (output)
  713. return output(name, size, id, qstr);
  714. if (qstr)
  715. qstrsetstr(qstr, name, len);
  716. return len;
  717. }
  718. }
  719. static int name_pipe (char * uri, size_t size, void * id)
  720. {
  721. IDTYPE pipeid;
  722. int len;
  723. int ret = DkRandomBitsRead(&pipeid, sizeof(pipeid));
  724. if (ret < 0)
  725. return -convert_pal_errno(-ret);
  726. debug("creating pipe: pipe.srv:%u\n", pipeid);
  727. if ((len = snprintf(uri, size, "pipe.srv:%u", pipeid)) == size)
  728. return -ERANGE;
  729. *((IDTYPE *) id) = pipeid;
  730. return len;
  731. }
  732. static int open_pipe (const char * uri, void * obj)
  733. {
  734. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  735. if (!pipe)
  736. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  737. -PAL_ERRNO;
  738. if (obj)
  739. *((PAL_HANDLE *) obj) = pipe;
  740. else
  741. DkObjectClose(pipe);
  742. return 0;
  743. }
  744. static int pipe_addr (char * uri, size_t size, const void * id,
  745. struct shim_qstr * qstr)
  746. {
  747. IDTYPE pipeid = *((IDTYPE *) id);
  748. int len;
  749. if ((len = snprintf(uri, size, "pipe:%u", pipeid)) == size)
  750. return -ERANGE;
  751. if (qstr)
  752. qstrsetstr(qstr, uri, len);
  753. return len;
  754. }
  755. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  756. struct shim_qstr * qstr)
  757. {
  758. IDTYPE pipeid;
  759. int ret = create_unique(&name_pipe, &open_pipe, &pipe_addr,
  760. uri, size, &pipeid, hdl, qstr);
  761. if (ret > 0 && id)
  762. *id = pipeid;
  763. return ret;
  764. }
  765. static int name_path (char * path, size_t size, void * id)
  766. {
  767. unsigned int suffix;
  768. int prefix_len = strlen(path);
  769. int len;
  770. int ret = DkRandomBitsRead(&suffix, sizeof(suffix));
  771. if (ret < 0)
  772. return -convert_pal_errno(-ret);
  773. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  774. if (len == size)
  775. return -ERANGE;
  776. *((unsigned int *) id) = suffix;
  777. return prefix_len + len;
  778. }
  779. static int open_dir (const char * path, void * obj)
  780. {
  781. struct shim_handle * dir = NULL;
  782. if (obj) {
  783. dir = get_new_handle();
  784. if (!dir)
  785. return -ENOMEM;
  786. }
  787. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  788. NULL);
  789. if (ret < 0)
  790. return ret = -EEXIST ? 1 : ret;
  791. if (obj)
  792. *((struct shim_handle **) obj) = dir;
  793. return 0;
  794. }
  795. static int open_file (const char * path, void * obj)
  796. {
  797. struct shim_handle * file = NULL;
  798. if (obj) {
  799. file = get_new_handle();
  800. if (!file)
  801. return -ENOMEM;
  802. }
  803. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  804. NULL);
  805. if (ret < 0)
  806. return ret = -EEXIST ? 1 : ret;
  807. if (obj)
  808. *((struct shim_handle **) obj) = file;
  809. return 0;
  810. }
  811. static int open_pal_handle (const char * uri, void * obj)
  812. {
  813. PAL_HANDLE hdl;
  814. if (strpartcmp_static(uri, "dev:"))
  815. hdl = DkStreamOpen(uri, 0,
  816. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  817. PAL_SHARE_OWNER_R,
  818. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  819. 0);
  820. else
  821. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  822. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  823. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  824. 0);
  825. if (!hdl) {
  826. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  827. return 0;
  828. else
  829. return -PAL_ERRNO;
  830. }
  831. if (obj) {
  832. *((PAL_HANDLE *) obj) = hdl;
  833. } else {
  834. DkObjectClose(hdl);
  835. }
  836. return 0;
  837. }
  838. static int output_path (char * path, size_t size, const void * id,
  839. struct shim_qstr * qstr)
  840. {
  841. int len = strlen(path);
  842. if (qstr)
  843. qstrsetstr(qstr, path, len);
  844. return len;
  845. }
  846. int create_dir (const char * prefix, char * path, size_t size,
  847. struct shim_handle ** hdl)
  848. {
  849. unsigned int suffix;
  850. if (prefix) {
  851. int len = strlen(prefix);
  852. if (len >= size)
  853. return -ERANGE;
  854. memcpy(path, prefix, len + 1);
  855. }
  856. return create_unique(&name_path, &open_dir, &output_path, path, size,
  857. &suffix, hdl, NULL);
  858. }
  859. int create_file (const char * prefix, char * path, size_t size,
  860. struct shim_handle ** hdl)
  861. {
  862. unsigned int suffix;
  863. if (prefix) {
  864. int len = strlen(prefix);
  865. if (len >= size)
  866. return -ERANGE;
  867. memcpy(path, prefix, len + 1);
  868. }
  869. return create_unique(&name_path, &open_file, &output_path, path, size,
  870. &suffix, hdl, NULL);
  871. }
  872. int create_handle (const char * prefix, char * uri, size_t size,
  873. PAL_HANDLE * hdl, unsigned int * id)
  874. {
  875. unsigned int suffix;
  876. if (prefix) {
  877. int len = strlen(prefix);
  878. if (len >= size)
  879. return -ERANGE;
  880. memcpy(uri, prefix, len + 1);
  881. }
  882. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  883. id ? : &suffix, hdl, NULL);
  884. }
  885. void check_stack_hook (void)
  886. {
  887. struct shim_thread * cur_thread = get_cur_thread();
  888. void * rsp;
  889. __asm__ volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  890. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  891. if (rsp - cur_thread->stack < PAL_CB(pagesize))
  892. SYS_PRINTF("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  893. rsp, cur_thread->stack, cur_thread->stack_top);
  894. } else {
  895. SYS_PRINTF("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  896. rsp, cur_thread->stack, cur_thread->stack_top);
  897. }
  898. }
  899. #ifdef PROFILE
  900. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  901. int level)
  902. {
  903. unsigned long total_interval_time = 0;
  904. unsigned long total_interval_count = 0;
  905. for (int i = 0 ; i < N_PROFILE ; i++) {
  906. struct shim_profile * profile = &PROFILES[i];
  907. if (profile->root != root || profile->disabled)
  908. continue;
  909. switch (profile->type) {
  910. case OCCURENCE: {
  911. unsigned int count =
  912. atomic_read(&profile->val.occurence.count);
  913. if (count) {
  914. for (int j = 0 ; j < level ; j++)
  915. __SYS_FPRINTF(hdl, " ");
  916. __SYS_FPRINTF(hdl, "- %s: %u times\n", profile->name, count);
  917. }
  918. break;
  919. }
  920. case INTERVAL: {
  921. unsigned int count =
  922. atomic_read(&profile->val.interval.count);
  923. if (count) {
  924. unsigned long time =
  925. atomic_read(&profile->val.interval.time);
  926. unsigned long ind_time = time / count;
  927. total_interval_time += time;
  928. total_interval_count += count;
  929. for (int j = 0 ; j < level ; j++)
  930. __SYS_FPRINTF(hdl, " ");
  931. __SYS_FPRINTF(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  932. time, profile->name, count, ind_time);
  933. }
  934. break;
  935. }
  936. case CATEGORY:
  937. for (int j = 0 ; j < level ; j++)
  938. __SYS_FPRINTF(hdl, " ");
  939. __SYS_FPRINTF(hdl, "- %s:\n", profile->name);
  940. print_profile_result(hdl, profile, level + 1);
  941. break;
  942. }
  943. }
  944. if (total_interval_count) {
  945. __SYS_FPRINTF(hdl, " - (%11.11u) total: %u times, %lu msec\n",
  946. total_interval_time, total_interval_count,
  947. total_interval_time / total_interval_count);
  948. }
  949. }
  950. #endif /* PROFILE */
  951. static struct atomic_int in_terminate = { .counter = 0, };
  952. noreturn void shim_terminate (int err)
  953. {
  954. debug("teminating the whole process (%d)\n", err);
  955. /* do last clean-up of the process */
  956. shim_clean(err);
  957. DkProcessExit(err);
  958. }
  959. /* cleanup and terminate process, preserve exit code if err == 0 */
  960. int shim_clean (int err)
  961. {
  962. /* preventing multiple cleanup, this is mostly caused by
  963. assertion in shim_clean */
  964. atomic_inc(&in_terminate);
  965. if (atomic_read(&in_terminate) > 1)
  966. return 0;
  967. if (err != 0)
  968. cur_process.exit_code = err;
  969. store_all_msg_persist();
  970. #ifdef PROFILE
  971. if (ENTER_TIME) {
  972. switch (shim_get_tls()->context.syscall_nr) {
  973. case __NR_exit_group:
  974. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  975. break;
  976. case __NR_exit:
  977. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  978. break;
  979. }
  980. }
  981. if (ipc_cld_profile_send()) {
  982. MASTER_LOCK();
  983. PAL_HANDLE hdl = __open_shim_stdio();
  984. if (hdl) {
  985. __SYS_FPRINTF(hdl, "******************************\n");
  986. __SYS_FPRINTF(hdl, "profiling:\n");
  987. print_profile_result(hdl, &profile_root, 0);
  988. __SYS_FPRINTF(hdl, "******************************\n");
  989. }
  990. MASTER_UNLOCK();
  991. DkObjectClose(hdl);
  992. }
  993. #endif
  994. del_all_ipc_ports(0);
  995. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  996. DkObjectClose(shim_stdio);
  997. shim_stdio = NULL;
  998. debug("process %u exited with status %d\n", cur_process.vmid & 0xFFFF, cur_process.exit_code);
  999. MASTER_LOCK();
  1000. DkProcessExit(cur_process.exit_code);
  1001. return 0;
  1002. }
  1003. int message_confirm (const char * message, const char * options)
  1004. {
  1005. char answer;
  1006. int noptions = strlen(options);
  1007. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  1008. int ret = 0;
  1009. *(str++) = ' ';
  1010. *(str++) = '[';
  1011. for (int i = 0 ; i < noptions ; i++) {
  1012. *(str++) = options[i];
  1013. *(str++) = '/';
  1014. }
  1015. str--;
  1016. *(str++) = ']';
  1017. *(str++) = ' ';
  1018. MASTER_LOCK();
  1019. PAL_HANDLE hdl = __open_shim_stdio();
  1020. if (!hdl) {
  1021. MASTER_UNLOCK();
  1022. return -EACCES;
  1023. }
  1024. #define WRITE(buf, len) \
  1025. ({ int _ret = DkStreamWrite(hdl, 0, len, (void*)(buf), NULL); \
  1026. _ret ? : -PAL_ERRNO; })
  1027. #define READ(buf, len) \
  1028. ({ int _ret = DkStreamRead(hdl, 0, len, buf, NULL, 0); \
  1029. _ret ? : -PAL_ERRNO; })
  1030. if ((ret = WRITE(message, strlen(message))) < 0)
  1031. goto out;
  1032. if ((ret = WRITE(option_str, noptions * 2 + 3)) < 0)
  1033. goto out;
  1034. if ((ret = READ(&answer, 1)) < 0)
  1035. goto out;
  1036. out:
  1037. DkObjectClose(hdl);
  1038. MASTER_UNLOCK();
  1039. return (ret < 0) ? ret : answer;
  1040. }