/* Copyright (C) 2014 Stony Brook University This file is part of Graphene Library OS. Graphene Library OS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Graphene Library OS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* * db_main.c * * This file contains the main function of the PAL loader, which loads and * processes environment, arguments and manifest. */ #include "pal_defs.h" #include "pal_linux_defs.h" #include "pal.h" #include "pal_internal.h" #include "pal_linux.h" #include "pal_debug.h" #include "pal_error.h" #include "pal_security.h" #include "api.h" #include #include #include #include #include /* At the begining of entry point, rsp starts at argc, then argvs, envps and auxvs. Here we store rsp to rdi, so it will not be messed up by function calls */ __asm__ (".global pal_start\n" " .type pal_start,@function\n" "pal_start:\n" " movq %rsp, %rdi\n" " andq $~15, %rsp\n" " call pal_linux_main\n"); #define RTLD_BOOTSTRAP /* pal_start is the entry point of libpal.so, which calls pal_main */ #define _ENTRY pal_start /* use objfile-gdb convention instead of .debug_gdb_scripts */ #ifdef DEBUG __asm__ (".pushsection \".debug_gdb_scripts\", \"MS\",@progbits,1\r\n" ".byte 1\r\n" ".asciz \"" PAL_FILE("host/Linux/pal-gdb.py") "\"\r\n" ".popsection\r\n"); #endif struct pal_linux_state linux_state; struct pal_sec pal_sec; static size_t g_page_size = PRESET_PAGESIZE; static int uid, gid; #if USE_VDSO_GETTIME == 1 static ElfW(Addr) sysinfo_ehdr; #endif static void pal_init_bootstrap (void * args, const char ** pal_name, int * pargc, const char *** pargv, const char *** penvp) { /* * fetch arguments and environment variables, the previous stack * pointer is in rdi (arg). The stack structure starting at rdi * will look like: * auxv[m - 1] = AT_NULL * ... * auxv[0] * envp[n - 1] = NULL * ... * envp[0] * argv[argc] = NULL * argv[argc - 1] * ... * argv[0] * argc * --------------------------------------- * user stack */ const char ** all_args = (const char **) args; int argc = (uintptr_t) all_args[0]; const char ** argv = &all_args[1]; const char ** envp = argv + argc + 1; /* fetch environment information from aux vectors */ const char ** e = envp; #ifdef DEBUG for (; *e ; e++) if ((*e)[0] == 'I' && (*e)[1] == 'N' && (*e)[2] == '_' && (*e)[3] == 'G' && (*e)[4] == 'D' && (*e)[5] == 'B' && (*e)[6] == '=' && (*e)[7] == '1' && !(*e)[8]) linux_state.in_gdb = true; #else for (; *e ; e++); #endif ElfW(auxv_t) *av; for (av = (ElfW(auxv_t) *) (e + 1) ; av->a_type != AT_NULL ; av++) switch (av->a_type) { case AT_PAGESZ: g_page_size = av->a_un.a_val; break; case AT_UID: case AT_EUID: uid ^= av->a_un.a_val; break; case AT_GID: case AT_EGID: gid ^= av->a_un.a_val; break; #if USE_VDSO_GETTIME == 1 case AT_SYSINFO_EHDR: sysinfo_ehdr = av->a_un.a_val; break; #endif } *pal_name = argv[0]; argv++; argc--; *pargc = argc; *pargv = argv; *penvp = envp; } unsigned long _DkGetPagesize (void) { return g_page_size; } unsigned long _DkGetAllocationAlignment (void) { return g_page_size; } void _DkGetAvailableUserAddressRange (PAL_PTR * start, PAL_PTR * end, PAL_PTR * hole_start, PAL_PTR * hole_end) { void* end_addr = (void*)ALLOC_ALIGN_DOWN_PTR(TEXT_START); void* start_addr = (void*)USER_ADDRESS_LOWEST; assert(IS_ALLOC_ALIGNED_PTR(start_addr) && IS_ALLOC_ALIGNED_PTR(end_addr)); while (1) { if (start_addr >= end_addr) INIT_FAIL(PAL_ERROR_NOMEM, "no user memory available"); void * mem = (void *) ARCH_MMAP(start_addr, pal_state.alloc_align, PROT_NONE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (!IS_ERR_P(mem)) { INLINE_SYSCALL(munmap, 2, mem, pal_state.alloc_align); if (mem == start_addr) break; } start_addr = (void *) ((unsigned long) start_addr << 1); } *end = (PAL_PTR) end_addr; *start = (PAL_PTR) start_addr; // Not used, so set it to an empty range. *hole_start = start_addr; *hole_end = start_addr; } PAL_NUM _DkGetProcessId (void) { return linux_state.process_id; } PAL_NUM _DkGetHostId (void) { return 0; } #include "dynamic_link.h" void setup_pal_map (struct link_map * map); #if USE_VDSO_GETTIME == 1 void setup_vdso_map (ElfW(Addr) addr); #endif static struct link_map pal_map; #ifdef __x86_64__ # include "elf-x86_64.h" #else # error "unsupported architecture" #endif void pal_linux_main (void * args) { const char * pal_name = NULL; PAL_HANDLE parent = NULL, exec = NULL, manifest = NULL; const char ** argv, ** envp; int argc; PAL_HANDLE first_thread; unsigned long start_time = _DkSystemTimeQueryEarly(); /* parse argc, argv, envp and auxv */ pal_init_bootstrap(args, &pal_name, &argc, &argv, &envp); pal_map.l_addr = elf_machine_load_address(); pal_map.l_name = pal_name; elf_get_dynamic_info((void *) pal_map.l_addr + elf_machine_dynamic(), pal_map.l_info, pal_map.l_addr); ELF_DYNAMIC_RELOCATE(&pal_map); linux_state.environ = envp; init_slab_mgr(g_page_size); first_thread = malloc(HANDLE_SIZE(thread)); if (!first_thread) INIT_FAIL(PAL_ERROR_NOMEM, "Out of memory"); SET_HANDLE_TYPE(first_thread, thread); first_thread->thread.tid = INLINE_SYSCALL(gettid, 0); void * alt_stack = calloc(1, ALT_STACK_SIZE); if (!alt_stack) INIT_FAIL(PAL_ERROR_NOMEM, "Out of memory"); first_thread->thread.stack = alt_stack; // Initialize TCB at the top of the alternative stack. PAL_TCB_LINUX * tcb = alt_stack + ALT_STACK_SIZE - sizeof(PAL_TCB_LINUX); tcb->common.self = &tcb->common; tcb->handle = first_thread; tcb->alt_stack = alt_stack; // Stack bottom tcb->callback = NULL; tcb->param = NULL; pal_thread_init(tcb); setup_pal_map(&pal_map); #if USE_VDSO_GETTIME == 1 if (sysinfo_ehdr) setup_vdso_map(sysinfo_ehdr); #endif pal_state.start_time = start_time; init_child_process(&parent, &exec, &manifest); if (!pal_sec.process_id) pal_sec.process_id = INLINE_SYSCALL(getpid, 0); linux_state.pid = pal_sec.process_id; linux_state.uid = uid; linux_state.gid = gid; linux_state.process_id = (start_time & (~0xffff)) | linux_state.pid; if (!linux_state.parent_process_id) linux_state.parent_process_id = linux_state.process_id; if (parent) goto done_init; int fd = INLINE_SYSCALL(open, 3, argv[0], O_RDONLY|O_CLOEXEC, 0); if (IS_ERR(fd)) { // DEP 10/20/16: Don't silently swallow permission errors // accessing the manifest if (fd == -13) { printf("Warning: Attempt to open file %s failed with permission denied\n", argv[0]); } goto done_init; } size_t len = strlen(argv[0]) + 1; PAL_HANDLE file = malloc(HANDLE_SIZE(file) + len); SET_HANDLE_TYPE(file, file); HANDLE_HDR(file)->flags |= RFD(0)|WFD(0); file->file.fd = fd; file->file.map_start = NULL; char * path = (void *) file + HANDLE_SIZE(file); int ret = get_norm_path(argv[0], path, &len); if (ret < 0) { printf("Could not normalize path (%s): %s\n", argv[0], pal_strerror(ret)); goto done_init; } file->file.realpath = path; if (!check_elf_object(file)) { exec = file; goto done_init; } manifest = file; done_init: if (!parent && !exec && !manifest) { printf("Executable not found\n"); printf("USAGE: %s [executable|manifest] args ...\n", pal_name); _DkProcessExit(0); } signal_setup(); /* call to main function */ pal_main((PAL_NUM) linux_state.parent_process_id, manifest, exec, NULL, parent, first_thread, argv, envp); } /* the following code is borrowed from CPUID */ void cpuid (unsigned int leaf, unsigned int subleaf, unsigned int words[]) { __asm__ ("cpuid" : "=a" (words[PAL_CPUID_WORD_EAX]), "=b" (words[PAL_CPUID_WORD_EBX]), "=c" (words[PAL_CPUID_WORD_ECX]), "=d" (words[PAL_CPUID_WORD_EDX]) : "a" (leaf), "c" (subleaf)); } #define FOUR_CHARS_VALUE(s, w) \ (s)[0] = (w) & 0xff; \ (s)[1] = ((w) >> 8) & 0xff; \ (s)[2] = ((w) >> 16) & 0xff; \ (s)[3] = ((w) >> 24) & 0xff; #define BPI 32 #define POWER2(power) \ (1ULL << (power)) #define RIGHTMASK(width) \ (((unsigned long)(width) >= BPI) ? ~0ULL : POWER2(width) - 1ULL) #define BIT_EXTRACT_LE(value, start, after) \ (((unsigned long)(value) & RIGHTMASK(after)) >> start) static char * cpu_flags[] = { "fpu", // "x87 FPU on chip" "vme", // "virtual-8086 mode enhancement" "de", // "debugging extensions" "pse", // "page size extensions" "tsc", // "time stamp counter" "msr", // "RDMSR and WRMSR support" "pae", // "physical address extensions" "mce", // "machine check exception" "cx8", // "CMPXCHG8B inst." "apic", // "APIC on chip" NULL, "sep", // "SYSENTER and SYSEXIT" "mtrr", // "memory type range registers" "pge", // "PTE global bit" "mca", // "machine check architecture" "cmov", // "conditional move/compare instruction" "pat", // "page attribute table" "pse36", // "page size extension" "pn", // "processor serial number" "clflush", // "CLFLUSH instruction" NULL, "dts", // "debug store" "acpi", // "Onboard thermal control" "mmx", // "MMX Technology" "fxsr", // "FXSAVE/FXRSTOR" "sse", // "SSE extensions" "sse2", // "SSE2 extensions" "ss", // "self snoop" "ht", // "hyper-threading / multi-core supported" "tm", // "therm. monitor" "ia64", // "IA64" "pbe", // "pending break event" }; /* * Returns the number of online CPUs read from /sys/devices/system/cpu/online, -errno on failure. * Understands complex formats like "1,3-5,6". */ int get_cpu_count(void) { int fd = INLINE_SYSCALL(open, 3, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC, 0); if (fd < 0) return unix_to_pal_error(ERRNO(fd)); char buf[64]; int ret = INLINE_SYSCALL(read, 3, fd, buf, sizeof(buf) - 1); INLINE_SYSCALL(close, 1, fd); if (ret < 0) { return unix_to_pal_error(ERRNO(ret)); } buf[ret] = '\0'; /* ensure null-terminated buf even in partial read */ char* end; char* ptr = buf; int cpu_count = 0; while (*ptr) { while (*ptr == ' ' || *ptr == '\t' || *ptr == ',') ptr++; int firstint = (int)strtol(ptr, &end, 10); if (ptr == end) break; if (*end == '\0' || *end == ',') { /* single CPU index, count as one more CPU */ cpu_count++; } else if (*end == '-') { /* CPU range, count how many CPUs in range */ ptr = end + 1; int secondint = (int)strtol(ptr, &end, 10); if (secondint > firstint) cpu_count += secondint - firstint + 1; // inclusive (e.g., 0-7, or 8-16) } ptr = end; } if (cpu_count == 0) return -PAL_ERROR_STREAMNOTEXIST; return cpu_count; } int _DkGetCPUInfo (PAL_CPU_INFO * ci) { unsigned int words[PAL_CPUID_WORD_NUM]; int rv = 0; const size_t VENDOR_ID_SIZE = 13; char* vendor_id = malloc(VENDOR_ID_SIZE); cpuid(0, 0, words); FOUR_CHARS_VALUE(&vendor_id[0], words[PAL_CPUID_WORD_EBX]); FOUR_CHARS_VALUE(&vendor_id[4], words[PAL_CPUID_WORD_EDX]); FOUR_CHARS_VALUE(&vendor_id[8], words[PAL_CPUID_WORD_ECX]); vendor_id[VENDOR_ID_SIZE - 1] = '\0'; ci->cpu_vendor = vendor_id; const size_t BRAND_SIZE = 49; char* brand = malloc(BRAND_SIZE); cpuid(0x80000002, 0, words); memcpy(&brand[ 0], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM); cpuid(0x80000003, 0, words); memcpy(&brand[16], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM); cpuid(0x80000004, 0, words); memcpy(&brand[32], words, sizeof(unsigned int) * PAL_CPUID_WORD_NUM); brand[BRAND_SIZE - 1] = '\0'; ci->cpu_brand = brand; /* we cannot use CPUID(0xb) because it counts even disabled-by-BIOS cores (e.g. HT cores); * instead we extract info on number of online CPUs by parsing sysfs pseudo-files */ int cores = get_cpu_count(); if (cores < 0) { free(vendor_id); free(brand); return cores; } ci->cpu_num = cores; cpuid(1, 0, words); ci->cpu_family = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 8, 12); ci->cpu_model = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 4, 8); ci->cpu_stepping = BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 0, 4); if (!memcmp(vendor_id, "GenuineIntel", 12) || !memcmp(vendor_id, "AuthenticAMD", 12)) { ci->cpu_family += BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 20, 28); ci->cpu_model += BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EAX], 16, 20) << 4; } int flen = 0, fmax = 80; char * flags = malloc(fmax); for (int i = 0 ; i < 32 ; i++) { if (!cpu_flags[i]) continue; if (BIT_EXTRACT_LE(words[PAL_CPUID_WORD_EDX], i, i + 1)) { int len = strlen(cpu_flags[i]); if (flen + len + 1 > fmax) { char * new_flags = malloc(fmax * 2); memcpy(new_flags, flags, flen); free(flags); fmax *= 2; flags = new_flags; } memcpy(flags + flen, cpu_flags[i], len); flen += len; flags[flen++] = ' '; } } flags[flen ? flen - 1 : 0] = 0; ci->cpu_flags = flags; return rv; }