shim_init.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*!
  14. * \file shim_init.c
  15. *
  16. * This file contains entry and exit functions of library OS.
  17. */
  18. #include <shim_defs.h>
  19. #include <shim_internal.h>
  20. #include <shim_table.h>
  21. #include <shim_tcb.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_fs.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <shim_vdso.h>
  30. #include <pal.h>
  31. #include <pal_debug.h>
  32. #include <pal_error.h>
  33. #include <sys/mman.h>
  34. #include <asm/unistd.h>
  35. #include <asm/fcntl.h>
  36. static_assert(sizeof(shim_tcb_t) <= PAL_LIBOS_TCB_SIZE,
  37. "shim_tcb_t does not fit into PAL_TCB; "
  38. "please increase PAL_LIBOS_TCB_SIZE");
  39. size_t g_pal_alloc_align;
  40. /* The following constants will help matching glibc version with compatible
  41. SHIM libraries */
  42. #include "glibc-version.h"
  43. const unsigned int glibc_version = GLIBC_VERSION;
  44. static void handle_failure (PAL_PTR event, PAL_NUM arg, PAL_CONTEXT * context)
  45. {
  46. __UNUSED(event);
  47. __UNUSED(context);
  48. if ((arg <= PAL_ERROR_NATIVE_COUNT) || (arg >= PAL_ERROR_CRYPTO_START &&
  49. arg <= PAL_ERROR_CRYPTO_END))
  50. shim_get_tcb()->pal_errno = arg;
  51. else
  52. shim_get_tcb()->pal_errno = PAL_ERROR_DENIED;
  53. }
  54. noreturn void __abort(void) {
  55. PAUSE();
  56. shim_clean_and_exit(-ENOTRECOVERABLE);
  57. }
  58. void warn (const char *format, ...)
  59. {
  60. va_list args;
  61. va_start (args, format);
  62. __SYS_VPRINTF(format, args);
  63. va_end (args);
  64. }
  65. void __stack_chk_fail (void)
  66. {
  67. }
  68. static int pal_errno_to_unix_errno [PAL_ERROR_NATIVE_COUNT + 1] = {
  69. /* reserved */ 0,
  70. /* PAL_ERROR_NOTIMPLEMENTED */ ENOSYS,
  71. /* PAL_ERROR_NOTDEFINED */ ENOSYS,
  72. /* PAL_ERROR_NOTSUPPORT */ EACCES,
  73. /* PAL_ERROR_INVAL */ EINVAL,
  74. /* PAL_ERROR_TOOLONG */ ENAMETOOLONG,
  75. /* PAL_ERROR_DENIED */ EACCES,
  76. /* PAL_ERROR_BADHANDLE */ EFAULT,
  77. /* PAL_ERROR_STREAMEXIST */ EEXIST,
  78. /* PAL_ERROR_STREAMNOTEXIST */ ENOENT,
  79. /* PAL_ERROR_STREAMISFILE */ ENOTDIR,
  80. /* PAL_ERROR_STREAMISDIR */ EISDIR,
  81. /* PAL_ERROR_STREAMISDEVICE */ ESPIPE,
  82. /* PAL_ERROR_INTERRUPTED */ EINTR,
  83. /* PAL_ERROR_OVERFLOW */ EFAULT,
  84. /* PAL_ERROR_BADADDR */ EFAULT,
  85. /* PAL_ERROR_NOMEM */ ENOMEM,
  86. /* PAL_ERROR_NOTKILLABLE */ EACCES,
  87. /* PAL_ERROR_INCONSIST */ EFAULT,
  88. /* PAL_ERROR_TRYAGAIN */ EAGAIN,
  89. /* PAL_ERROR_ENDOFSTREAM */ 0,
  90. /* PAL_ERROR_NOTSERVER */ EINVAL,
  91. /* PAL_ERROR_NOTCONNECTION */ ENOTCONN,
  92. /* PAL_ERROR_CONNFAILED */ ECONNRESET,
  93. /* PAL_ERROR_ADDRNOTEXIST */ EADDRNOTAVAIL,
  94. /* PAL_ERROR_AFNOSUPPORT */ EAFNOSUPPORT,
  95. };
  96. long convert_pal_errno (long err)
  97. {
  98. return (err >= 0 && err <= PAL_ERROR_NATIVE_COUNT) ?
  99. pal_errno_to_unix_errno[err] : EACCES;
  100. }
  101. /*!
  102. * \brief Parse a number into an unsigned long.
  103. *
  104. * \param str A string containing a non-negative number.
  105. *
  106. * By default the number should be decimal, but if it starts with 0x it is
  107. * parsed as hexadecimal and if it otherwise starts with 0, it is parsed as
  108. * octal.
  109. */
  110. unsigned long parse_int (const char * str)
  111. {
  112. unsigned long num = 0;
  113. int radix = 10;
  114. char c;
  115. if (str[0] == '0') {
  116. str++;
  117. radix = 8;
  118. if (str[0] == 'x') {
  119. str++;
  120. radix = 16;
  121. }
  122. }
  123. while ((c = *(str++))) {
  124. int val;
  125. if (c >= 'A' && c <= 'F')
  126. val = c - 'A' + 10;
  127. else if (c >= 'a' && c <= 'f')
  128. val = c - 'a' + 10;
  129. else if (c >= '0' && c <= '9')
  130. val = c - '0';
  131. else
  132. break;
  133. if (val >= radix)
  134. break;
  135. num = num * radix + val;
  136. }
  137. if (c == 'G' || c == 'g')
  138. num *= 1024 * 1024 * 1024;
  139. else if (c == 'M' || c == 'm')
  140. num *= 1024 * 1024;
  141. else if (c == 'K' || c == 'k')
  142. num *= 1024;
  143. return num;
  144. }
  145. long int glibc_option (const char * opt)
  146. {
  147. char cfg[CONFIG_MAX];
  148. if (!strcmp_static(opt, "heap_size")) {
  149. ssize_t ret = get_config(root_config, "glibc.heap_size", cfg, sizeof(cfg));
  150. if (ret <= 0) {
  151. debug("no glibc option: %s (err=%ld)\n", opt, ret);
  152. return -ENOENT;
  153. }
  154. long int heap_size = parse_int(cfg);
  155. debug("glibc option: heap_size = %ld\n", heap_size);
  156. return (long int) heap_size;
  157. }
  158. return -EINVAL;
  159. }
  160. void * migrated_memory_start;
  161. void * migrated_memory_end;
  162. const char ** initial_envp __attribute_migratable;
  163. /* library_paths is populated with LD_PRELOAD entries once during LibOS
  164. * initialization and is used in __load_interp_object() to search for ELF
  165. * program interpreter in specific paths. Once allocated, its memory is
  166. * never freed or updated. */
  167. char ** library_paths = NULL;
  168. struct shim_lock __master_lock;
  169. bool lock_enabled;
  170. void update_fs_base (unsigned long fs_base)
  171. {
  172. shim_tcb_t * shim_tcb = shim_get_tcb();
  173. shim_tcb->context.fs_base = fs_base;
  174. DkSegmentRegister(PAL_SEGMENT_FS, (PAL_PTR)fs_base);
  175. assert(shim_tcb_check_canary());
  176. }
  177. DEFINE_PROFILE_OCCURENCE(alloc_stack, memory);
  178. DEFINE_PROFILE_OCCURENCE(alloc_stack_count, memory);
  179. #define STACK_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
  180. void * allocate_stack (size_t size, size_t protect_size, bool user)
  181. {
  182. size = ALLOC_ALIGN_UP(size);
  183. protect_size = ALLOC_ALIGN_UP(protect_size);
  184. /* preserve a non-readable, non-writable page below the user
  185. stack to stop user program to clobber other vmas */
  186. void * stack = NULL;
  187. int flags = STACK_FLAGS|(user ? 0 : VMA_INTERNAL);
  188. if (user) {
  189. stack = bkeep_unmapped_heap(size + protect_size, PROT_NONE,
  190. flags, NULL, 0, "stack");
  191. if (!stack)
  192. return NULL;
  193. stack = (void *)
  194. DkVirtualMemoryAlloc(stack, size + protect_size,
  195. 0, PAL_PROT_NONE);
  196. } else {
  197. stack = system_malloc(size + protect_size);
  198. }
  199. if (!stack)
  200. return NULL;
  201. ADD_PROFILE_OCCURENCE(alloc_stack, size + protect_size);
  202. INC_PROFILE_OCCURENCE(alloc_stack_count);
  203. stack += protect_size;
  204. // Ensure proper alignment for process' initial stack pointer value.
  205. stack = ALIGN_UP_PTR(stack, 16);
  206. DkVirtualMemoryProtect(stack, size, PAL_PROT_READ|PAL_PROT_WRITE);
  207. if (bkeep_mprotect(stack, size, PROT_READ|PROT_WRITE, flags) < 0)
  208. return NULL;
  209. debug("allocated stack at %p (size = %ld)\n", stack, size);
  210. return stack;
  211. }
  212. static int populate_user_stack (void * stack, size_t stack_size,
  213. elf_auxv_t ** auxpp, int ** argcpp,
  214. const char *** argvp, const char *** envpp)
  215. {
  216. const int argc = **argcpp;
  217. const char ** argv = *argvp, ** envp = *envpp;
  218. const char ** new_argv = NULL, ** new_envp = NULL;
  219. elf_auxv_t *new_auxp = NULL;
  220. void * stack_bottom = stack;
  221. void * stack_top = stack + stack_size;
  222. #define ALLOCATE_TOP(size) \
  223. ({ if ((stack_top -= (size)) < stack_bottom) return -ENOMEM; \
  224. stack_top; })
  225. #define ALLOCATE_BOTTOM(size) \
  226. ({ if ((stack_bottom += (size)) > stack_top) return -ENOMEM; \
  227. stack_bottom - (size); })
  228. /* ld.so expects argc as long on stack, not int. */
  229. long * argcp = ALLOCATE_BOTTOM(sizeof(long));
  230. *argcp = **argcpp;
  231. if (!argv) {
  232. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  233. goto copy_envp;
  234. }
  235. new_argv = stack_bottom;
  236. while (argv) {
  237. /* Even though the SysV ABI does not specify the order of argv strings,
  238. some applications (notably Node.js's libuv) assume the compact
  239. encoding of argv where (1) all strings are located adjacently and
  240. (2) in increasing order. */
  241. int argv_size = 0;
  242. for (const char ** a = argv ; *a ; a++)
  243. argv_size += strlen(*a) + 1;
  244. char * argv_bottom = ALLOCATE_TOP(argv_size);
  245. for (const char ** a = argv ; *a ; a++) {
  246. const char ** t = ALLOCATE_BOTTOM(sizeof(const char *));
  247. int len = strlen(*a) + 1;
  248. char * abuf = argv_bottom;
  249. argv_bottom += len;
  250. memcpy(abuf, *a, len);
  251. *t = abuf;
  252. }
  253. *((const char **) ALLOCATE_BOTTOM(sizeof(const char *))) = NULL;
  254. copy_envp:
  255. if (!envp)
  256. break;
  257. new_envp = stack_bottom;
  258. argv = envp;
  259. envp = NULL;
  260. }
  261. if (!new_envp)
  262. *(const char **) ALLOCATE_BOTTOM(sizeof(const char *)) = NULL;
  263. /* reserve space for ELF aux vectors, populated later by LibOS */
  264. new_auxp = ALLOCATE_BOTTOM(REQUIRED_ELF_AUXV * sizeof(elf_auxv_t) +
  265. REQUIRED_ELF_AUXV_SPACE);
  266. /* x86_64 ABI requires 16 bytes alignment on stack on every function
  267. call. */
  268. size_t move_size = stack_bottom - stack;
  269. *argcpp = stack_top - move_size;
  270. *argcpp = ALIGN_DOWN_PTR(*argcpp, 16UL);
  271. **argcpp = argc;
  272. size_t shift = (void*)(*argcpp) - stack;
  273. memmove(*argcpp, stack, move_size);
  274. *argvp = new_argv ? (void *) new_argv + shift : NULL;
  275. *envpp = new_envp ? (void *) new_envp + shift : NULL;
  276. *auxpp = new_auxp ? (void *) new_auxp + shift : NULL;
  277. /* clear working area at the bottom */
  278. memset(stack, 0, shift);
  279. return 0;
  280. }
  281. int init_stack (const char ** argv, const char ** envp,
  282. int ** argcpp, const char *** argpp,
  283. elf_auxv_t ** auxpp)
  284. {
  285. uint64_t stack_size = get_rlimit_cur(RLIMIT_STACK);
  286. if (root_config) {
  287. char stack_cfg[CONFIG_MAX];
  288. if (get_config(root_config, "sys.stack.size", stack_cfg, sizeof(stack_cfg)) > 0) {
  289. stack_size = ALLOC_ALIGN_UP(parse_int(stack_cfg));
  290. set_rlimit_cur(RLIMIT_STACK, stack_size);
  291. }
  292. }
  293. struct shim_thread * cur_thread = get_cur_thread();
  294. if (!cur_thread || cur_thread->stack)
  295. return 0;
  296. void * stack = allocate_stack(stack_size, g_pal_alloc_align, true);
  297. if (!stack)
  298. return -ENOMEM;
  299. if (initial_envp)
  300. envp = initial_envp;
  301. int ret = populate_user_stack(stack, stack_size,
  302. auxpp, argcpp, &argv, &envp);
  303. if (ret < 0)
  304. return ret;
  305. *argpp = argv;
  306. initial_envp = envp;
  307. cur_thread->stack_top = stack + stack_size;
  308. cur_thread->stack = stack;
  309. cur_thread->stack_red = stack - g_pal_alloc_align;
  310. return 0;
  311. }
  312. int read_environs (const char ** envp)
  313. {
  314. for (const char ** e = envp ; *e ; e++) {
  315. if (strstartswith_static(*e, "LD_LIBRARY_PATH=")) {
  316. /* populate library_paths with entries from LD_LIBRARY_PATH envvar */
  317. const char * s = *e + static_strlen("LD_LIBRARY_PATH=");
  318. size_t npaths = 2; // One for the first entry, one for the last
  319. // NULL.
  320. for (const char * tmp = s ; *tmp ; tmp++)
  321. if (*tmp == ':')
  322. npaths++;
  323. char** paths = malloc(sizeof(const char *) *
  324. npaths);
  325. if (!paths)
  326. return -ENOMEM;
  327. size_t cnt = 0;
  328. while (*s) {
  329. const char * next;
  330. for (next = s ; *next && *next != ':' ; next++);
  331. size_t len = next - s;
  332. char * str = malloc(len + 1);
  333. if (!str) {
  334. for (size_t i = 0; i < cnt; i++)
  335. free(paths[i]);
  336. free(paths);
  337. return -ENOMEM;
  338. }
  339. memcpy(str, s, len);
  340. str[len] = 0;
  341. paths[cnt++] = str;
  342. s = *next ? next + 1 : next;
  343. }
  344. paths[cnt] = NULL;
  345. assert(!library_paths);
  346. library_paths = paths;
  347. return 0;
  348. }
  349. }
  350. return 0;
  351. }
  352. struct config_store * root_config = NULL;
  353. static void * __malloc (size_t size)
  354. {
  355. return malloc(size);
  356. }
  357. static void __free (void * mem)
  358. {
  359. free(mem);
  360. }
  361. int init_manifest (PAL_HANDLE manifest_handle)
  362. {
  363. int ret = 0;
  364. void * addr = NULL;
  365. size_t size = 0, map_size = 0;
  366. #define MAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|VMA_INTERNAL)
  367. if (PAL_CB(manifest_preload.start)) {
  368. addr = PAL_CB(manifest_preload.start);
  369. size = PAL_CB(manifest_preload.end) - PAL_CB(manifest_preload.start);
  370. } else {
  371. PAL_STREAM_ATTR attr;
  372. if (!DkStreamAttributesQueryByHandle(manifest_handle, &attr))
  373. return -PAL_ERRNO;
  374. size = attr.pending_size;
  375. map_size = ALLOC_ALIGN_UP(size);
  376. addr = bkeep_unmapped_any(map_size, PROT_READ, MAP_FLAGS,
  377. 0, "manifest");
  378. if (!addr)
  379. return -ENOMEM;
  380. void* ret_addr = DkStreamMap(manifest_handle, addr, PAL_PROT_READ, 0, ALLOC_ALIGN_UP(size));
  381. if (!ret_addr) {
  382. bkeep_munmap(addr, map_size, MAP_FLAGS);
  383. return -ENOMEM;
  384. } else {
  385. assert(addr == ret_addr);
  386. }
  387. }
  388. struct config_store * new_root_config = malloc(sizeof(struct config_store));
  389. if (!new_root_config) {
  390. ret = -ENOMEM;
  391. goto fail;
  392. }
  393. new_root_config->raw_data = addr;
  394. new_root_config->raw_size = size;
  395. new_root_config->malloc = __malloc;
  396. new_root_config->free = __free;
  397. const char * errstring = "Unexpected error";
  398. if ((ret = read_config(new_root_config, NULL, &errstring)) < 0) {
  399. SYS_PRINTF("Unable to read manifest file: %s\n", errstring);
  400. goto fail;
  401. }
  402. root_config = new_root_config;
  403. return 0;
  404. fail:
  405. if (map_size) {
  406. DkStreamUnmap(addr, map_size);
  407. if (bkeep_munmap(addr, map_size, MAP_FLAGS) < 0)
  408. BUG();
  409. }
  410. free(new_root_config);
  411. return ret;
  412. }
  413. #ifdef PROFILE
  414. struct shim_profile profile_root;
  415. #endif
  416. # define FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
  417. do { \
  418. void *_tmp = (cookie); \
  419. (argv) = _tmp; \
  420. _tmp += sizeof(char *) * ((argc) + 1); \
  421. (envp) = _tmp; \
  422. for ( ; *(char **) _tmp; _tmp += sizeof(char *)); \
  423. (auxp) = _tmp + sizeof(char *); \
  424. } while (0)
  425. #ifdef PROFILE
  426. static void set_profile_enabled (const char ** envp)
  427. {
  428. const char ** p;
  429. for (p = envp ; (*p) ; p++)
  430. if (strstartswith_static(*p, "PROFILE_ENABLED="))
  431. break;
  432. if (!(*p))
  433. return;
  434. for (size_t i = 0 ; i < N_PROFILE ; i++)
  435. PROFILES[i].disabled = true;
  436. const char * str = (*p) + 16;
  437. bool enabled = false;
  438. while (*str) {
  439. const char * next = str;
  440. for ( ; (*next) && (*next) != ',' ; next++);
  441. if (next > str) {
  442. size_t len = next - str;
  443. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  444. struct shim_profile * profile = &PROFILES[i];
  445. if (!memcmp(profile->name, str, len) && !profile->name[len]) {
  446. profile->disabled = false;
  447. if (profile->type == CATEGORY)
  448. enabled = true;
  449. }
  450. }
  451. }
  452. str = (*next) ? next + 1 : next;
  453. }
  454. while (enabled) {
  455. enabled = false;
  456. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  457. struct shim_profile * profile = &PROFILES[i];
  458. if (!profile->disabled || profile->root == &profile_)
  459. continue;
  460. if (!profile->root->disabled) {
  461. profile->disabled = false;
  462. if (profile->type == CATEGORY)
  463. enabled = true;
  464. }
  465. }
  466. }
  467. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  468. struct shim_profile * profile = &PROFILES[i];
  469. if (profile->type == CATEGORY || profile->disabled)
  470. continue;
  471. for (profile = profile->root ;
  472. profile != &profile_ && profile->disabled ;
  473. profile = profile->root)
  474. profile->disabled = false;
  475. }
  476. }
  477. #endif
  478. static int init_newproc (struct newproc_header * hdr)
  479. {
  480. BEGIN_PROFILE_INTERVAL();
  481. PAL_NUM bytes = DkStreamRead(PAL_CB(parent_process), 0,
  482. sizeof(struct newproc_header), hdr,
  483. NULL, 0);
  484. if (bytes == PAL_STREAM_ERROR)
  485. return -PAL_ERRNO;
  486. SAVE_PROFILE_INTERVAL(child_wait_header);
  487. SAVE_PROFILE_INTERVAL_SINCE(child_receive_header, hdr->write_proc_time);
  488. return hdr->failure;
  489. }
  490. DEFINE_PROFILE_CATEGORY(pal, );
  491. DEFINE_PROFILE_INTERVAL(pal_startup_time, pal);
  492. DEFINE_PROFILE_INTERVAL(pal_host_specific_startup_time, pal);
  493. DEFINE_PROFILE_INTERVAL(pal_relocation_time, pal);
  494. DEFINE_PROFILE_INTERVAL(pal_linking_time, pal);
  495. DEFINE_PROFILE_INTERVAL(pal_manifest_loading_time, pal);
  496. DEFINE_PROFILE_INTERVAL(pal_allocation_time, pal);
  497. DEFINE_PROFILE_INTERVAL(pal_tail_startup_time, pal);
  498. DEFINE_PROFILE_INTERVAL(pal_child_creation_time, pal);
  499. DEFINE_PROFILE_CATEGORY(init, );
  500. DEFINE_PROFILE_INTERVAL(init_vma, init);
  501. DEFINE_PROFILE_INTERVAL(init_slab, init);
  502. DEFINE_PROFILE_INTERVAL(init_str_mgr, init);
  503. DEFINE_PROFILE_INTERVAL(init_internal_map, init);
  504. DEFINE_PROFILE_INTERVAL(init_rlimit, init);
  505. DEFINE_PROFILE_INTERVAL(init_fs, init);
  506. DEFINE_PROFILE_INTERVAL(init_dcache, init);
  507. DEFINE_PROFILE_INTERVAL(init_handle, init);
  508. DEFINE_PROFILE_INTERVAL(read_from_checkpoint, init);
  509. DEFINE_PROFILE_INTERVAL(read_from_file, init);
  510. DEFINE_PROFILE_INTERVAL(init_newproc, init);
  511. DEFINE_PROFILE_INTERVAL(init_mount_root, init);
  512. DEFINE_PROFILE_INTERVAL(init_from_checkpoint_file, init);
  513. DEFINE_PROFILE_INTERVAL(restore_from_file, init);
  514. DEFINE_PROFILE_INTERVAL(init_manifest, init);
  515. DEFINE_PROFILE_INTERVAL(init_ipc, init);
  516. DEFINE_PROFILE_INTERVAL(init_thread, init);
  517. DEFINE_PROFILE_INTERVAL(init_important_handles, init);
  518. DEFINE_PROFILE_INTERVAL(init_mount, init);
  519. DEFINE_PROFILE_INTERVAL(init_async, init);
  520. DEFINE_PROFILE_INTERVAL(init_stack, init);
  521. DEFINE_PROFILE_INTERVAL(read_environs, init);
  522. DEFINE_PROFILE_INTERVAL(init_loader, init);
  523. DEFINE_PROFILE_INTERVAL(init_ipc_helper, init);
  524. DEFINE_PROFILE_INTERVAL(init_signal, init);
  525. #define CALL_INIT(func, args ...) func(args)
  526. #define RUN_INIT(func, ...) \
  527. do { \
  528. int _err = CALL_INIT(func, ##__VA_ARGS__); \
  529. if (_err < 0) { \
  530. SYS_PRINTF("shim_init() in " #func " (%d)\n", _err); \
  531. shim_clean_and_exit(_err); \
  532. } \
  533. SAVE_PROFILE_INTERVAL(func); \
  534. } while (0)
  535. extern PAL_HANDLE thread_start_event;
  536. noreturn void* shim_init (int argc, void * args)
  537. {
  538. debug_handle = PAL_CB(debug_stream);
  539. cur_process.vmid = (IDTYPE) PAL_CB(process_id);
  540. /* create the initial TCB, shim can not be run without a tcb */
  541. shim_tcb_init();
  542. update_fs_base(0);
  543. __disable_preempt(shim_get_tcb()); // Temporarily disable preemption for delaying any signal
  544. // that arrives during initialization
  545. debug_setbuf(shim_get_tcb(), true);
  546. #ifdef PROFILE
  547. unsigned long begin_time = GET_PROFILE_INTERVAL();
  548. #endif
  549. debug("host: %s\n", PAL_CB(host_type));
  550. DkSetExceptionHandler(&handle_failure, PAL_EVENT_FAILURE);
  551. g_pal_alloc_align = PAL_CB(alloc_align);
  552. if (!IS_POWER_OF_2(g_pal_alloc_align)) {
  553. SYS_PRINTF("shim_init(): error: PAL allocation alignment not a power of 2\n");
  554. shim_clean_and_exit(-EINVAL);
  555. }
  556. if (!create_lock(&__master_lock)) {
  557. SYS_PRINTF("shim_init(): error: failed to allocate __master_lock\n");
  558. shim_clean_and_exit(-ENOMEM);
  559. }
  560. int * argcp = &argc;
  561. const char ** argv, ** envp, ** argp = NULL;
  562. elf_auxv_t * auxp;
  563. /* call to figure out where the arguments are */
  564. FIND_ARG_COMPONENTS(args, argc, argv, envp, auxp);
  565. #ifdef PROFILE
  566. set_profile_enabled(envp);
  567. #endif
  568. struct newproc_header hdr;
  569. void * cpaddr = NULL;
  570. #ifdef PROFILE
  571. unsigned long begin_create_time = 0;
  572. #endif
  573. BEGIN_PROFILE_INTERVAL();
  574. RUN_INIT(init_vma);
  575. RUN_INIT(init_slab);
  576. RUN_INIT(read_environs, envp);
  577. RUN_INIT(init_str_mgr);
  578. RUN_INIT(init_internal_map);
  579. RUN_INIT(init_rlimit);
  580. RUN_INIT(init_fs);
  581. RUN_INIT(init_dcache);
  582. RUN_INIT(init_handle);
  583. debug("shim loaded at %p, ready to initialize\n", &__load_address);
  584. if (argc && argv[0][0] == '-') {
  585. if (!strcmp_static(argv[0], "-resume") && argc >= 2) {
  586. const char * filename = *(argv + 1);
  587. argc -= 2;
  588. argv += 2;
  589. RUN_INIT(init_mount_root);
  590. RUN_INIT(init_from_checkpoint_file, filename, &hdr.checkpoint,
  591. &cpaddr);
  592. }
  593. }
  594. if (!cpaddr && PAL_CB(parent_process)) {
  595. RUN_INIT(init_newproc, &hdr);
  596. SAVE_PROFILE_INTERVAL_SET(child_created_in_new_process,
  597. hdr.create_time, begin_time);
  598. #ifdef PROFILE
  599. begin_create_time = hdr.begin_create_time;
  600. #endif
  601. if (hdr.checkpoint.hdr.size)
  602. RUN_INIT(do_migration, &hdr.checkpoint, &cpaddr);
  603. }
  604. if (cpaddr) {
  605. thread_start_event = DkNotificationEventCreate(PAL_FALSE);
  606. RUN_INIT(restore_checkpoint,
  607. &hdr.checkpoint.hdr, &hdr.checkpoint.mem,
  608. (ptr_t) cpaddr, 0);
  609. }
  610. if (PAL_CB(manifest_handle))
  611. RUN_INIT(init_manifest, PAL_CB(manifest_handle));
  612. RUN_INIT(init_mount_root);
  613. RUN_INIT(init_ipc);
  614. RUN_INIT(init_thread);
  615. RUN_INIT(init_mount);
  616. RUN_INIT(init_important_handles);
  617. RUN_INIT(init_async);
  618. RUN_INIT(init_stack, argv, envp, &argcp, &argp, &auxp);
  619. RUN_INIT(init_loader);
  620. RUN_INIT(init_ipc_helper);
  621. RUN_INIT(init_signal);
  622. if (PAL_CB(parent_process)) {
  623. /* Notify the parent process */
  624. struct newproc_response res;
  625. res.child_vmid = cur_process.vmid;
  626. res.failure = 0;
  627. PAL_NUM ret = DkStreamWrite(PAL_CB(parent_process), 0,
  628. sizeof(struct newproc_response),
  629. &res, NULL);
  630. if (ret == PAL_STREAM_ERROR)
  631. shim_do_exit(-PAL_ERRNO);
  632. /* Downgrade communication with parent to non-secure (only checkpoint recv is secure).
  633. * Currently only relevant to SGX PAL, other PALs ignore this. */
  634. PAL_STREAM_ATTR attr;
  635. if (!DkStreamAttributesQueryByHandle(PAL_CB(parent_process), &attr))
  636. shim_do_exit(-PAL_ERRNO);
  637. attr.secure = PAL_FALSE;
  638. if (!DkStreamAttributesSetByHandle(PAL_CB(parent_process), &attr))
  639. shim_do_exit(-PAL_ERRNO);
  640. }
  641. debug("shim process initialized\n");
  642. #ifdef PROFILE
  643. if (begin_create_time)
  644. SAVE_PROFILE_INTERVAL_SINCE(child_total_migration_time,
  645. begin_create_time);
  646. #endif
  647. SAVE_PROFILE_INTERVAL_SET(pal_startup_time, 0, pal_control.startup_time);
  648. SAVE_PROFILE_INTERVAL_SET(pal_host_specific_startup_time, 0,
  649. pal_control.host_specific_startup_time);
  650. SAVE_PROFILE_INTERVAL_SET(pal_relocation_time, 0,
  651. pal_control.relocation_time);
  652. SAVE_PROFILE_INTERVAL_SET(pal_linking_time, 0, pal_control.linking_time);
  653. SAVE_PROFILE_INTERVAL_SET(pal_manifest_loading_time, 0,
  654. pal_control.manifest_loading_time);
  655. SAVE_PROFILE_INTERVAL_SET(pal_allocation_time, 0,
  656. pal_control.allocation_time);
  657. SAVE_PROFILE_INTERVAL_SET(pal_tail_startup_time, 0,
  658. pal_control.tail_startup_time);
  659. SAVE_PROFILE_INTERVAL_SET(pal_child_creation_time, 0,
  660. pal_control.child_creation_time);
  661. if (thread_start_event)
  662. DkEventSet(thread_start_event);
  663. shim_tcb_t * cur_tcb = shim_get_tcb();
  664. struct shim_thread * cur_thread = (struct shim_thread *) cur_tcb->tp;
  665. if (cur_tcb->context.regs && cur_tcb->context.regs->rsp) {
  666. vdso_map_migrate();
  667. restore_context(&cur_tcb->context);
  668. }
  669. if (cur_thread->exec)
  670. execute_elf_object(cur_thread->exec, argcp, argp, auxp);
  671. shim_do_exit(0);
  672. }
  673. static int create_unique (int (*mkname) (char *, size_t, void *),
  674. int (*create) (const char *, void *),
  675. int (*output) (char *, size_t, const void *,
  676. struct shim_qstr *),
  677. char * name, size_t size, void * id, void * obj,
  678. struct shim_qstr * qstr)
  679. {
  680. int ret, len;
  681. while (1) {
  682. len = mkname(name, size, id);
  683. if (len < 0)
  684. return len;
  685. if ((ret = create(name, obj)) < 0)
  686. return ret;
  687. if (ret)
  688. continue;
  689. if (output)
  690. return output(name, size, id, qstr);
  691. if (qstr)
  692. qstrsetstr(qstr, name, len);
  693. return len;
  694. }
  695. }
  696. static int name_pipe_rand (char * uri, size_t size, void * id)
  697. {
  698. IDTYPE pipeid;
  699. size_t len;
  700. int ret = DkRandomBitsRead(&pipeid, sizeof(pipeid));
  701. if (ret < 0)
  702. return -convert_pal_errno(-ret);
  703. debug("creating pipe: " URI_PREFIX_PIPE_SRV "%u\n", pipeid);
  704. if ((len = snprintf(uri, size, URI_PREFIX_PIPE_SRV "%u", pipeid)) >= size)
  705. return -ERANGE;
  706. *((IDTYPE *)id) = pipeid;
  707. return len;
  708. }
  709. static int name_pipe_vmid (char * uri, size_t size, void * id)
  710. {
  711. IDTYPE pipeid = cur_process.vmid;
  712. size_t len;
  713. debug("creating pipe: " URI_PREFIX_PIPE_SRV "%u\n", pipeid);
  714. if ((len = snprintf(uri, size, URI_PREFIX_PIPE_SRV "%u", pipeid)) >= size)
  715. return -ERANGE;
  716. *((IDTYPE *)id) = pipeid;
  717. return len;
  718. }
  719. static int open_pipe (const char * uri, void * obj)
  720. {
  721. PAL_HANDLE pipe = DkStreamOpen(uri, 0, 0, 0, 0);
  722. if (!pipe)
  723. return PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST ? 1 :
  724. -PAL_ERRNO;
  725. if (obj)
  726. *((PAL_HANDLE *) obj) = pipe;
  727. else
  728. DkObjectClose(pipe);
  729. return 0;
  730. }
  731. static int pipe_addr (char * uri, size_t size, const void * id,
  732. struct shim_qstr * qstr)
  733. {
  734. IDTYPE pipeid = *((IDTYPE *) id);
  735. size_t len;
  736. if ((len = snprintf(uri, size, URI_PREFIX_PIPE "%u", pipeid)) == size)
  737. return -ERANGE;
  738. if (qstr)
  739. qstrsetstr(qstr, uri, len);
  740. return len;
  741. }
  742. int create_pipe (IDTYPE * id, char * uri, size_t size, PAL_HANDLE * hdl,
  743. struct shim_qstr * qstr, bool use_vmid_for_name)
  744. {
  745. IDTYPE pipeid;
  746. int ret;
  747. if (use_vmid_for_name)
  748. ret = create_unique(&name_pipe_vmid, &open_pipe, &pipe_addr,
  749. uri, size, &pipeid, hdl, qstr);
  750. else
  751. ret = create_unique(&name_pipe_rand, &open_pipe, &pipe_addr,
  752. uri, size, &pipeid, hdl, qstr);
  753. if (ret > 0 && id)
  754. *id = pipeid;
  755. return ret;
  756. }
  757. static int name_path (char * path, size_t size, void * id)
  758. {
  759. unsigned int suffix;
  760. int prefix_len = strlen(path);
  761. size_t len;
  762. int ret = DkRandomBitsRead(&suffix, sizeof(suffix));
  763. if (ret < 0)
  764. return -convert_pal_errno(-ret);
  765. len = snprintf(path + prefix_len, size - prefix_len, "%08x", suffix);
  766. if (len == size)
  767. return -ERANGE;
  768. *((unsigned int *) id) = suffix;
  769. return prefix_len + len;
  770. }
  771. static int open_dir (const char * path, void * obj)
  772. {
  773. struct shim_handle * dir = NULL;
  774. if (obj) {
  775. dir = get_new_handle();
  776. if (!dir)
  777. return -ENOMEM;
  778. }
  779. int ret = open_namei(dir, NULL, path, O_CREAT|O_EXCL|O_DIRECTORY, 0700,
  780. NULL);
  781. if (ret < 0)
  782. return ret = -EEXIST ? 1 : ret;
  783. if (obj)
  784. *((struct shim_handle **) obj) = dir;
  785. return 0;
  786. }
  787. static int open_file (const char * path, void * obj)
  788. {
  789. struct shim_handle * file = NULL;
  790. if (obj) {
  791. file = get_new_handle();
  792. if (!file)
  793. return -ENOMEM;
  794. }
  795. int ret = open_namei(file, NULL, path, O_CREAT|O_EXCL|O_RDWR, 0600,
  796. NULL);
  797. if (ret < 0)
  798. return ret = -EEXIST ? 1 : ret;
  799. if (obj)
  800. *((struct shim_handle **) obj) = file;
  801. return 0;
  802. }
  803. static int open_pal_handle (const char * uri, void * obj)
  804. {
  805. PAL_HANDLE hdl;
  806. if (strstartswith_static(uri, URI_PREFIX_DEV))
  807. hdl = DkStreamOpen(uri, 0,
  808. PAL_SHARE_OWNER_X|PAL_SHARE_OWNER_W|
  809. PAL_SHARE_OWNER_R,
  810. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  811. 0);
  812. else
  813. hdl = DkStreamOpen(uri, PAL_ACCESS_RDWR,
  814. PAL_SHARE_OWNER_W|PAL_SHARE_OWNER_R,
  815. PAL_CREATE_TRY|PAL_CREATE_ALWAYS,
  816. 0);
  817. if (!hdl) {
  818. if (PAL_NATIVE_ERRNO == PAL_ERROR_STREAMEXIST)
  819. return 0;
  820. else
  821. return -PAL_ERRNO;
  822. }
  823. if (obj) {
  824. *((PAL_HANDLE *) obj) = hdl;
  825. } else {
  826. DkObjectClose(hdl);
  827. }
  828. return 0;
  829. }
  830. static int output_path (char * path, size_t size, const void * id,
  831. struct shim_qstr * qstr)
  832. {
  833. size_t len = strlen(path);
  834. // API compatibility
  835. __UNUSED(size);
  836. __UNUSED(id);
  837. if (qstr)
  838. qstrsetstr(qstr, path, len);
  839. return len;
  840. }
  841. int create_dir (const char * prefix, char * path, size_t size,
  842. struct shim_handle ** hdl)
  843. {
  844. unsigned int suffix;
  845. if (prefix) {
  846. size_t len = strlen(prefix);
  847. if (len >= size)
  848. return -ERANGE;
  849. memcpy(path, prefix, len + 1);
  850. }
  851. return create_unique(&name_path, &open_dir, &output_path, path, size,
  852. &suffix, hdl, NULL);
  853. }
  854. int create_file (const char * prefix, char * path, size_t size,
  855. struct shim_handle ** hdl)
  856. {
  857. unsigned int suffix;
  858. if (prefix) {
  859. size_t len = strlen(prefix);
  860. if (len >= size)
  861. return -ERANGE;
  862. memcpy(path, prefix, len + 1);
  863. }
  864. return create_unique(&name_path, &open_file, &output_path, path, size,
  865. &suffix, hdl, NULL);
  866. }
  867. int create_handle (const char * prefix, char * uri, size_t size,
  868. PAL_HANDLE * hdl, unsigned int * id)
  869. {
  870. unsigned int suffix;
  871. if (prefix) {
  872. size_t len = strlen(prefix);
  873. if (len >= size)
  874. return -ERANGE;
  875. memcpy(uri, prefix, len + 1);
  876. }
  877. return create_unique(&name_path, &open_pal_handle, &output_path, uri, size,
  878. id ? : &suffix, hdl, NULL);
  879. }
  880. void check_stack_hook (void)
  881. {
  882. struct shim_thread * cur_thread = get_cur_thread();
  883. void * rsp;
  884. __asm__ volatile ("movq %%rsp, %0" : "=r"(rsp) :: "memory");
  885. if (rsp <= cur_thread->stack_top && rsp > cur_thread->stack) {
  886. if ((uintptr_t)rsp - (uintptr_t)cur_thread->stack < PAL_CB(alloc_align))
  887. SYS_PRINTF("*** stack is almost drained (RSP = %p, stack = %p-%p) ***\n",
  888. rsp, cur_thread->stack, cur_thread->stack_top);
  889. } else {
  890. SYS_PRINTF("*** context dismatched with thread stack (RSP = %p, stack = %p-%p) ***\n",
  891. rsp, cur_thread->stack, cur_thread->stack_top);
  892. }
  893. }
  894. #ifdef PROFILE
  895. static void print_profile_result (PAL_HANDLE hdl, struct shim_profile * root,
  896. int level)
  897. {
  898. unsigned long total_interval_time = 0;
  899. unsigned long total_interval_count = 0;
  900. for (size_t i = 0 ; i < N_PROFILE ; i++) {
  901. struct shim_profile * profile = &PROFILES[i];
  902. if (profile->root != root || profile->disabled)
  903. continue;
  904. switch (profile->type) {
  905. case OCCURENCE: {
  906. unsigned int count =
  907. atomic_read(&profile->val.occurence.count);
  908. if (count) {
  909. for (int j = 0 ; j < level ; j++)
  910. __SYS_FPRINTF(hdl, " ");
  911. __SYS_FPRINTF(hdl, "- %s: %u times\n", profile->name, count);
  912. }
  913. break;
  914. }
  915. case INTERVAL: {
  916. unsigned int count =
  917. atomic_read(&profile->val.interval.count);
  918. if (count) {
  919. unsigned long time =
  920. atomic_read(&profile->val.interval.time);
  921. unsigned long ind_time = time / count;
  922. total_interval_time += time;
  923. total_interval_count += count;
  924. for (int j = 0 ; j < level ; j++)
  925. __SYS_FPRINTF(hdl, " ");
  926. __SYS_FPRINTF(hdl, "- (%11.11lu) %s: %u times, %lu msec\n",
  927. time, profile->name, count, ind_time);
  928. }
  929. break;
  930. }
  931. case CATEGORY:
  932. for (int j = 0 ; j < level ; j++)
  933. __SYS_FPRINTF(hdl, " ");
  934. __SYS_FPRINTF(hdl, "- %s:\n", profile->name);
  935. print_profile_result(hdl, profile, level + 1);
  936. break;
  937. }
  938. }
  939. if (total_interval_count) {
  940. __SYS_FPRINTF(hdl, " - (%11.11lu) total: %lu times, %lu msec\n",
  941. total_interval_time, total_interval_count,
  942. total_interval_time / total_interval_count);
  943. }
  944. }
  945. #endif /* PROFILE */
  946. noreturn void shim_clean_and_exit(int exit_code) {
  947. static int in_terminate = 0;
  948. if (__atomic_add_fetch(&in_terminate, 1, __ATOMIC_RELAXED) > 1) {
  949. while (true) {
  950. /* nothing */
  951. }
  952. }
  953. cur_process.exit_code = exit_code;
  954. store_all_msg_persist();
  955. #ifdef PROFILE
  956. if (ENTER_TIME) {
  957. switch (shim_get_tcb()->context.orig_rax) {
  958. case __NR_exit_group:
  959. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit_group, ENTER_TIME);
  960. break;
  961. case __NR_exit:
  962. SAVE_PROFILE_INTERVAL_SINCE(syscall_exit, ENTER_TIME);
  963. break;
  964. }
  965. }
  966. if (ipc_cld_profile_send()) {
  967. MASTER_LOCK();
  968. PAL_HANDLE hdl = __open_shim_stdio();
  969. if (hdl) {
  970. __SYS_FPRINTF(hdl, "******************************\n");
  971. __SYS_FPRINTF(hdl, "profiling:\n");
  972. print_profile_result(hdl, &profile_root, 0);
  973. __SYS_FPRINTF(hdl, "******************************\n");
  974. }
  975. MASTER_UNLOCK();
  976. DkObjectClose(hdl);
  977. }
  978. #endif
  979. del_all_ipc_ports();
  980. if (shim_stdio && shim_stdio != (PAL_HANDLE) -1)
  981. DkObjectClose(shim_stdio);
  982. shim_stdio = NULL;
  983. debug("process %u exited with status %d\n", cur_process.vmid & 0xFFFF, cur_process.exit_code);
  984. MASTER_LOCK();
  985. DkProcessExit(cur_process.exit_code);
  986. }
  987. int message_confirm (const char * message, const char * options)
  988. {
  989. char answer;
  990. int noptions = strlen(options);
  991. char * option_str = __alloca(noptions * 2 + 3), * str = option_str;
  992. int ret = 0;
  993. *(str++) = ' ';
  994. *(str++) = '[';
  995. for (int i = 0 ; i < noptions ; i++) {
  996. *(str++) = options[i];
  997. *(str++) = '/';
  998. }
  999. str--;
  1000. *(str++) = ']';
  1001. *(str++) = ' ';
  1002. MASTER_LOCK();
  1003. PAL_HANDLE hdl = __open_shim_stdio();
  1004. if (!hdl) {
  1005. MASTER_UNLOCK();
  1006. return -EACCES;
  1007. }
  1008. PAL_NUM pal_ret;
  1009. pal_ret = DkStreamWrite(hdl, 0, strlen(message), (void*)message, NULL);
  1010. if (pal_ret == PAL_STREAM_ERROR) {
  1011. ret = -PAL_ERRNO;
  1012. goto out;
  1013. }
  1014. pal_ret = DkStreamWrite(hdl, 0, noptions * 2 + 3, option_str, NULL);
  1015. if (pal_ret == PAL_STREAM_ERROR) {
  1016. ret = -PAL_ERRNO;
  1017. goto out;
  1018. }
  1019. pal_ret = DkStreamRead(hdl, 0, 1, &answer, NULL, 0);
  1020. if (pal_ret == PAL_STREAM_ERROR) {
  1021. ret = -PAL_ERRNO;
  1022. goto out;
  1023. }
  1024. out:
  1025. DkObjectClose(hdl);
  1026. MASTER_UNLOCK();
  1027. return (ret < 0) ? ret : answer;
  1028. }