db_main.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * db_main.c
  15. *
  16. * This file contains the main function of the PAL loader, which loads and
  17. * processes environment, arguments and manifest.
  18. */
  19. #include "pal_defs.h"
  20. #include "pal_linux_defs.h"
  21. #include "pal.h"
  22. #include "pal_internal.h"
  23. #include "pal_linux.h"
  24. #include "pal_debug.h"
  25. #include "pal_error.h"
  26. #include "pal_security.h"
  27. #include "api.h"
  28. #include <asm/mman.h>
  29. #include <asm/ioctls.h>
  30. #include <asm/errno.h>
  31. #include <elf/elf.h>
  32. #include <sysdeps/generic/ldsodefs.h>
  33. /* At the begining of entry point, rsp starts at argc, then argvs,
  34. envps and auxvs. Here we store rsp to rdi, so it will not be
  35. messed up by function calls */
  36. __asm__ (".global pal_start\n"
  37. " .type pal_start,@function\n"
  38. "pal_start:\n"
  39. " movq %rsp, %rdi\n"
  40. " andq $~15, %rsp\n"
  41. " call pal_linux_main\n");
  42. #define RTLD_BOOTSTRAP
  43. /* pal_start is the entry point of libpal.so, which calls pal_main */
  44. #define _ENTRY pal_start
  45. /* use objfile-gdb convention instead of .debug_gdb_scripts */
  46. #ifdef DEBUG
  47. __asm__ (".pushsection \".debug_gdb_scripts\", \"MS\",@progbits,1\r\n"
  48. ".byte 1\r\n"
  49. ".asciz \"" PAL_FILE("host/Linux/pal-gdb.py") "\"\r\n"
  50. ".popsection\r\n");
  51. #endif
  52. struct pal_linux_state linux_state;
  53. struct pal_sec pal_sec;
  54. static size_t g_page_size = PRESET_PAGESIZE;
  55. static int uid, gid;
  56. #if USE_VDSO_GETTIME == 1
  57. static ElfW(Addr) sysinfo_ehdr;
  58. #endif
  59. static void pal_init_bootstrap (void * args, const char ** pal_name,
  60. int * pargc,
  61. const char *** pargv,
  62. const char *** penvp)
  63. {
  64. /*
  65. * fetch arguments and environment variables, the previous stack
  66. * pointer is in rdi (arg). The stack structure starting at rdi
  67. * will look like:
  68. * auxv[m - 1] = AT_NULL
  69. * ...
  70. * auxv[0]
  71. * envp[n - 1] = NULL
  72. * ...
  73. * envp[0]
  74. * argv[argc] = NULL
  75. * argv[argc - 1]
  76. * ...
  77. * argv[0]
  78. * argc
  79. * ---------------------------------------
  80. * user stack
  81. */
  82. const char ** all_args = (const char **) args;
  83. int argc = (uintptr_t) all_args[0];
  84. const char ** argv = &all_args[1];
  85. const char ** envp = argv + argc + 1;
  86. /* fetch environment information from aux vectors */
  87. const char ** e = envp;
  88. #ifdef DEBUG
  89. for (; *e ; e++)
  90. if ((*e)[0] == 'I' && (*e)[1] == 'N' && (*e)[2] == '_' &&
  91. (*e)[3] == 'G' && (*e)[4] == 'D' && (*e)[5] == 'B' &&
  92. (*e)[6] == '=' && (*e)[7] == '1' && !(*e)[8])
  93. linux_state.in_gdb = true;
  94. #else
  95. for (; *e ; e++);
  96. #endif
  97. ElfW(auxv_t) *av;
  98. for (av = (ElfW(auxv_t) *) (e + 1) ; av->a_type != AT_NULL ; av++)
  99. switch (av->a_type) {
  100. case AT_PAGESZ:
  101. g_page_size = av->a_un.a_val;
  102. break;
  103. case AT_UID:
  104. case AT_EUID:
  105. uid ^= av->a_un.a_val;
  106. break;
  107. case AT_GID:
  108. case AT_EGID:
  109. gid ^= av->a_un.a_val;
  110. break;
  111. #if USE_VDSO_GETTIME == 1
  112. case AT_SYSINFO_EHDR:
  113. sysinfo_ehdr = av->a_un.a_val;
  114. break;
  115. #endif
  116. }
  117. *pal_name = argv[0];
  118. argv++;
  119. argc--;
  120. *pargc = argc;
  121. *pargv = argv;
  122. *penvp = envp;
  123. }
  124. unsigned long _DkGetPagesize (void)
  125. {
  126. return g_page_size;
  127. }
  128. unsigned long _DkGetAllocationAlignment (void)
  129. {
  130. return g_page_size;
  131. }
  132. void _DkGetAvailableUserAddressRange (PAL_PTR * start, PAL_PTR * end,
  133. PAL_PTR * hole_start, PAL_PTR * hole_end)
  134. {
  135. void* end_addr = (void*)ALLOC_ALIGN_DOWN_PTR(TEXT_START);
  136. void* start_addr = (void*)USER_ADDRESS_LOWEST;
  137. assert(IS_ALLOC_ALIGNED_PTR(start_addr) && IS_ALLOC_ALIGNED_PTR(end_addr));
  138. while (1) {
  139. if (start_addr >= end_addr)
  140. INIT_FAIL(PAL_ERROR_NOMEM, "no user memory available");
  141. void * mem = (void *) ARCH_MMAP(start_addr,
  142. pal_state.alloc_align,
  143. PROT_NONE,
  144. MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
  145. -1, 0);
  146. if (!IS_ERR_P(mem)) {
  147. INLINE_SYSCALL(munmap, 2, mem, pal_state.alloc_align);
  148. if (mem == start_addr)
  149. break;
  150. }
  151. start_addr = (void *) ((unsigned long) start_addr << 1);
  152. }
  153. *end = (PAL_PTR) end_addr;
  154. *start = (PAL_PTR) start_addr;
  155. // Not used, so set it to an empty range.
  156. *hole_start = start_addr;
  157. *hole_end = start_addr;
  158. }
  159. PAL_NUM _DkGetProcessId (void)
  160. {
  161. return linux_state.process_id;
  162. }
  163. PAL_NUM _DkGetHostId (void)
  164. {
  165. return 0;
  166. }
  167. #include "dynamic_link.h"
  168. void setup_pal_map (struct link_map * map);
  169. #if USE_VDSO_GETTIME == 1
  170. void setup_vdso_map (ElfW(Addr) addr);
  171. #endif
  172. static struct link_map pal_map;
  173. #ifdef __x86_64__
  174. # include "elf-x86_64.h"
  175. #else
  176. # error "unsupported architecture"
  177. #endif
  178. void pal_linux_main (void * args)
  179. {
  180. const char * pal_name = NULL;
  181. PAL_HANDLE parent = NULL, exec = NULL, manifest = NULL;
  182. const char ** argv, ** envp;
  183. int argc;
  184. PAL_HANDLE first_thread;
  185. unsigned long start_time = _DkSystemTimeQueryEarly();
  186. /* parse argc, argv, envp and auxv */
  187. pal_init_bootstrap(args, &pal_name, &argc, &argv, &envp);
  188. pal_map.l_addr = elf_machine_load_address();
  189. pal_map.l_name = pal_name;
  190. elf_get_dynamic_info((void *) pal_map.l_addr + elf_machine_dynamic(),
  191. pal_map.l_info, pal_map.l_addr);
  192. ELF_DYNAMIC_RELOCATE(&pal_map);
  193. linux_state.environ = envp;
  194. init_slab_mgr(g_page_size);
  195. first_thread = malloc(HANDLE_SIZE(thread));
  196. if (!first_thread)
  197. INIT_FAIL(PAL_ERROR_NOMEM, "Out of memory");
  198. SET_HANDLE_TYPE(first_thread, thread);
  199. first_thread->thread.tid = INLINE_SYSCALL(gettid, 0);
  200. void * alt_stack = calloc(1, ALT_STACK_SIZE);
  201. if (!alt_stack)
  202. INIT_FAIL(PAL_ERROR_NOMEM, "Out of memory");
  203. first_thread->thread.stack = alt_stack;
  204. // Initialize TCB at the top of the alternative stack.
  205. PAL_TCB_LINUX * tcb = alt_stack + ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX);
  206. tcb->common.self = &tcb->common;
  207. tcb->handle = first_thread;
  208. tcb->alt_stack = alt_stack; // Stack bottom
  209. tcb->callback = NULL;
  210. tcb->param = NULL;
  211. pal_thread_init(tcb);
  212. setup_pal_map(&pal_map);
  213. #if USE_VDSO_GETTIME == 1
  214. if (sysinfo_ehdr)
  215. setup_vdso_map(sysinfo_ehdr);
  216. #endif
  217. pal_state.start_time = start_time;
  218. init_child_process(&parent, &exec, &manifest);
  219. if (!pal_sec.process_id)
  220. pal_sec.process_id = INLINE_SYSCALL(getpid, 0);
  221. linux_state.pid = pal_sec.process_id;
  222. linux_state.uid = uid;
  223. linux_state.gid = gid;
  224. linux_state.process_id = (start_time & (~0xffff)) | linux_state.pid;
  225. if (!linux_state.parent_process_id)
  226. linux_state.parent_process_id = linux_state.process_id;
  227. if (parent)
  228. goto done_init;
  229. int fd = INLINE_SYSCALL(open, 3, argv[0], O_RDONLY|O_CLOEXEC, 0);
  230. if (IS_ERR(fd)) {
  231. // DEP 10/20/16: Don't silently swallow permission errors
  232. // accessing the manifest
  233. if (fd == -13) {
  234. printf("Warning: Attempt to open file %s failed with permission denied\n", argv[0]);
  235. }
  236. goto done_init;
  237. }
  238. size_t len = strlen(argv[0]) + 1;
  239. PAL_HANDLE file = malloc(HANDLE_SIZE(file) + len);
  240. SET_HANDLE_TYPE(file, file);
  241. HANDLE_HDR(file)->flags |= RFD(0)|WFD(0);
  242. file->file.fd = fd;
  243. file->file.map_start = NULL;
  244. char * path = (void *) file + HANDLE_SIZE(file);
  245. int ret = get_norm_path(argv[0], path, &len);
  246. if (ret < 0) {
  247. printf("Could not normalize path (%s): %s\n", argv[0], pal_strerror(ret));
  248. goto done_init;
  249. }
  250. file->file.realpath = path;
  251. if (!check_elf_object(file)) {
  252. exec = file;
  253. goto done_init;
  254. }
  255. manifest = file;
  256. done_init:
  257. if (!parent && !exec && !manifest) {
  258. printf("Executable not found\n");
  259. printf("USAGE: %s [executable|manifest] args ...\n", pal_name);
  260. _DkProcessExit(0);
  261. }
  262. signal_setup();
  263. /* call to main function */
  264. pal_main((PAL_NUM) linux_state.parent_process_id,
  265. manifest, exec, NULL, parent, first_thread, argv, envp);
  266. }
  267. /* the following code is borrowed from CPUID */
  268. void cpuid (unsigned int leaf, unsigned int subleaf,
  269. unsigned int words[])
  270. {
  271. __asm__ ("cpuid"
  272. : "=a" (words[PAL_CPUID_WORD_EAX]),
  273. "=b" (words[PAL_CPUID_WORD_EBX]),
  274. "=c" (words[PAL_CPUID_WORD_ECX]),
  275. "=d" (words[PAL_CPUID_WORD_EDX])
  276. : "a" (leaf),
  277. "c" (subleaf));
  278. }
  279. #define FOUR_CHARS_VALUE(s, w) \
  280. (s)[0] = (w) & 0xff; \
  281. (s)[1] = ((w) >> 8) & 0xff; \
  282. (s)[2] = ((w) >> 16) & 0xff; \
  283. (s)[3] = ((w) >> 24) & 0xff;
  284. #define BPI 32
  285. #define POWER2(power) \
  286. (1ULL << (power))
  287. #define RIGHTMASK(width) \
  288. (((unsigned long)(width) >= BPI) ? ~0ULL : POWER2(width) - 1ULL)
  289. #define BIT_EXTRACT_LE(value, start, after) \
  290. (((unsigned long)(value) & RIGHTMASK(after)) >> start)
  291. static char * cpu_flags[]
  292. = { "fpu", // "x87 FPU on chip"
  293. "vme", // "virtual-8086 mode enhancement"
  294. "de", // "debugging extensions"
  295. "pse", // "page size extensions"
  296. "tsc", // "time stamp counter"
  297. "msr", // "RDMSR and WRMSR support"
  298. "pae", // "physical address extensions"
  299. "mce", // "machine check exception"
  300. "cx8", // "CMPXCHG8B inst."
  301. "apic", // "APIC on chip"
  302. NULL,
  303. "sep", // "SYSENTER and SYSEXIT"
  304. "mtrr", // "memory type range registers"
  305. "pge", // "PTE global bit"
  306. "mca", // "machine check architecture"
  307. "cmov", // "conditional move/compare instruction"
  308. "pat", // "page attribute table"
  309. "pse36", // "page size extension"
  310. "pn", // "processor serial number"
  311. "clflush", // "CLFLUSH instruction"
  312. NULL,
  313. "dts", // "debug store"
  314. "acpi", // "Onboard thermal control"
  315. "mmx", // "MMX Technology"
  316. "fxsr", // "FXSAVE/FXRSTOR"
  317. "sse", // "SSE extensions"
  318. "sse2", // "SSE2 extensions"
  319. "ss", // "self snoop"
  320. "ht", // "hyper-threading / multi-core supported"
  321. "tm", // "therm. monitor"
  322. "ia64", // "IA64"
  323. "pbe", // "pending break event"
  324. };
  325. /*
  326. * Returns the number of online CPUs read from /sys/devices/system/cpu/online, -errno on failure.
  327. * Understands complex formats like "1,3-5,6".
  328. */
  329. int get_cpu_count(void) {
  330. int fd = INLINE_SYSCALL(open, 3, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC, 0);
  331. if (fd < 0)
  332. return unix_to_pal_error(ERRNO(fd));
  333. char buf[64];
  334. int ret = INLINE_SYSCALL(read, 3, fd, buf, sizeof(buf) - 1);
  335. INLINE_SYSCALL(close, 1, fd);
  336. if (ret < 0) {
  337. return unix_to_pal_error(ERRNO(ret));
  338. }
  339. buf[ret] = '\0'; /* ensure null-terminated buf even in partial read */
  340. char* end;
  341. char* ptr = buf;
  342. int cpu_count = 0;
  343. while (*ptr) {
  344. while (*ptr == ' ' || *ptr == '\t' || *ptr == ',')
  345. ptr++;
  346. int firstint = (int)strtol(ptr, &end, 10);
  347. if (ptr == end)
  348. break;
  349. if (*end == '\0' || *end == ',') {
  350. /* single CPU index, count as one more CPU */
  351. cpu_count++;
  352. } else if (*end == '-') {
  353. /* CPU range, count how many CPUs in range */
  354. ptr = end + 1;
  355. int secondint = (int)strtol(ptr, &end, 10);
  356. if (secondint > firstint)
  357. cpu_count += secondint - firstint + 1; // inclusive (e.g., 0-7, or 8-16)
  358. }
  359. ptr = end;
  360. }
  361. if (cpu_count == 0)
  362. return -PAL_ERROR_STREAMNOTEXIST;
  363. return cpu_count;
  364. }
  365. int _DkGetCPUInfo (PAL_CPU_INFO * ci)
  366. {
  367. unsigned int words[PAL_CPUID_WORD_NUM];
  368. int rv = 0;
  369. const size_t VENDOR_ID_SIZE = 13;
  370. char* vendor_id = malloc(VENDOR_ID_SIZE);
  371. cpuid(0, 0, words);
  372. FOUR_CHARS_VALUE(&vendor_id[0], words[PAL_CPUID_WORD_EBX]);
  373. FOUR_CHARS_VALUE(&vendor_id[4], words[PAL_CPUID_WORD_EDX]);
  374. FOUR_CHARS_VALUE(&vendor_id[8], words[PAL_CPUID_WORD_ECX]);
  375. vendor_id[VENDOR_ID_SIZE - 1] = '\0';
  376. ci->cpu_vendor = vendor_id;
  377. const size_t BRAND_SIZE = 49;
  378. char* brand = malloc(BRAND_SIZE);
  379. cpuid(0x80000002, 0, words);
  380. memcpy(&brand[ 0], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM);
  381. cpuid(0x80000003, 0, words);
  382. memcpy(&brand[16], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM);
  383. cpuid(0x80000004, 0, words);
  384. memcpy(&brand[32], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM);
  385. brand[BRAND_SIZE - 1] = '\0';
  386. ci->cpu_brand = brand;
  387. /* we cannot use CPUID(0xb) because it counts even disabled-by-BIOS cores (e.g. HT cores);
  388. * instead we extract info on number of online CPUs by parsing sysfs pseudo-files */
  389. int cores = get_cpu_count();
  390. if (cores < 0) {
  391. free(vendor_id);
  392. free(brand);
  393. return cores;
  394. }
  395. ci->cpu_num = cores;
  396. cpuid(1, 0, words);
  397. ci->cpu_family = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 8, 12);
  398. ci->cpu_model = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 4, 8);
  399. ci->cpu_stepping = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 0, 4);
  400. if (!memcmp(vendor_id, "GenuineIntel", 12) ||
  401. !memcmp(vendor_id, "AuthenticAMD", 12)) {
  402. ci->cpu_family += BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 20, 28);
  403. ci->cpu_model += BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 16, 20) << 4;
  404. }
  405. int flen = 0, fmax = 80;
  406. char * flags = malloc(fmax);
  407. for (int i = 0 ; i < 32 ; i++) {
  408. if (!cpu_flags[i])
  409. continue;
  410. if (BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EDX], i, i + 1)) {
  411. int len = strlen(cpu_flags[i]);
  412. if (flen + len + 1 > fmax) {
  413. char * new_flags = malloc(fmax * 2);
  414. memcpy(new_flags, flags, flen);
  415. free(flags);
  416. fmax *= 2;
  417. flags = new_flags;
  418. }
  419. memcpy(flags + flen, cpu_flags[i], len);
  420. flen += len;
  421. flags[flen++] = ' ';
  422. }
  423. }
  424. flags[flen ? flen - 1 : 0] = 0;
  425. ci->cpu_flags = flags;
  426. return rv;
  427. }