Procházet zdrojové kódy

[LibOS] Add dummy implementations of Linux scheduling APIs

This commit adds dummy implementations for setpriority, getpriority,
sched_setparam, sched_getparam, sched_setscheduler, sched_getscheduler,
sched_get_priority_max, sched_get_priority_min, sched_rr_get_interval,
sched_setaffinity, sched_getaffinity. These implementations only check
for incorrect user-supplied arguments and either do nothing (setters)
or return default values (getters). This commit also adds a simple LibOS
regression test.
Dmitrii Kuvaiskii před 4 roky
rodič
revize
376de63d5a

+ 10 - 0
LibOS/shim/include/shim_table.h

@@ -434,6 +434,15 @@ int shim_do_getpgid(pid_t pid);
 int shim_do_getsid(pid_t pid);
 int shim_do_sigpending(__sigset_t* set, size_t sigsetsize);
 int shim_do_sigaltstack(const stack_t* ss, stack_t* oss);
+int shim_do_setpriority(int which, int who, int niceval);
+int shim_do_getpriority(int which, int who);
+int shim_do_sched_setparam(pid_t pid, struct __kernel_sched_param* param);
+int shim_do_sched_getparam(pid_t pid, struct __kernel_sched_param* param);
+int shim_do_sched_setscheduler(pid_t pid, int policy, struct __kernel_sched_param* param);
+int shim_do_sched_getscheduler(pid_t pid);
+int shim_do_sched_get_priority_max(int policy);
+int shim_do_sched_get_priority_min(int policy);
+int shim_do_sched_rr_get_interval(pid_t pid, struct timespec* interval);
 int shim_do_sigsuspend(const __sigset_t* mask);
 void* shim_do_arch_prctl(int code, void* addr);
 int shim_do_setrlimit(int resource, struct __kernel_rlimit* rlim);
@@ -442,6 +451,7 @@ pid_t shim_do_gettid(void);
 int shim_do_tkill(int pid, int sig);
 time_t shim_do_time(time_t* tloc);
 int shim_do_futex(int* uaddr, int op, int val, void* utime, int* uaddr2, int val3);
+int shim_do_sched_setaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_mask_ptr);
 int shim_do_sched_getaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_mask_ptr);
 int shim_do_set_tid_address(int* tidptr);
 int shim_do_semtimedop(int semid, struct sembuf* sops, unsigned int nsops,

+ 15 - 12
LibOS/shim/src/shim_syscalls.c

@@ -537,24 +537,27 @@ SHIM_SYSCALL_PASSTHROUGH(fstatfs, 2, int, int, fd, struct statfs*, buf)
 
 SHIM_SYSCALL_PASSTHROUGH(sysfs, 3, int, int, option, unsigned long, arg1, unsigned long, arg2)
 
-SHIM_SYSCALL_PASSTHROUGH(getpriority, 2, int, int, which, int, who)
+DEFINE_SHIM_SYSCALL(setpriority, 3, shim_do_setpriority, int, int, which, int, who, int, niceval)
 
-SHIM_SYSCALL_PASSTHROUGH(setpriority, 3, int, int, which, int, who, int, niceval)
+DEFINE_SHIM_SYSCALL(getpriority, 2, shim_do_getpriority, int, int, which, int, who)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_setparam, 2, int, pid_t, pid, struct __kernel_sched_param*, param)
+DEFINE_SHIM_SYSCALL(sched_setparam, 2, shim_do_sched_setparam, int, pid_t, pid,
+                    struct __kernel_sched_param*, param)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_getparam, 2, int, pid_t, pid, struct __kernel_sched_param*, param)
+DEFINE_SHIM_SYSCALL(sched_getparam, 2, shim_do_sched_getparam, int, pid_t, pid,
+                    struct __kernel_sched_param*, param)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_setscheduler, 3, int, pid_t, pid, int, policy,
-                         struct __kernel_sched_param*, param)
+DEFINE_SHIM_SYSCALL(sched_setscheduler, 3, shim_do_sched_setscheduler, int, pid_t, pid,
+                    int, policy, struct __kernel_sched_param*, param)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_getscheduler, 1, int, pid_t, pid)
+DEFINE_SHIM_SYSCALL(sched_getscheduler, 1, shim_do_sched_getscheduler, int, pid_t, pid)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_get_priority_max, 1, int, int, policy)
+DEFINE_SHIM_SYSCALL(sched_get_priority_max, 1, shim_do_sched_get_priority_max, int, int, policy)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_get_priority_min, 1, int, int, policy)
+DEFINE_SHIM_SYSCALL(sched_get_priority_min, 1, shim_do_sched_get_priority_min, int, int, policy)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_rr_get_interval, 2, int, pid_t, pid, struct timespec*, interval)
+DEFINE_SHIM_SYSCALL(sched_rr_get_interval, 2, shim_do_sched_rr_get_interval, int, pid_t, pid,
+                    struct timespec*, interval)
 
 SHIM_SYSCALL_PASSTHROUGH(mlock, 2, int, void*, start, size_t, len)
 
@@ -715,8 +718,8 @@ DEFINE_SHIM_SYSCALL(time, 1, shim_do_time, time_t, time_t*, tloc)
 DEFINE_SHIM_SYSCALL(futex, 6, shim_do_futex, int, int*, uaddr, int, op, int, val, void*, utime,
                     int*, uaddr2, int, val3)
 
-SHIM_SYSCALL_PASSTHROUGH(sched_setaffinity, 3, int, pid_t, pid, size_t, len, __kernel_cpu_set_t*,
-                         user_mask_ptr)
+DEFINE_SHIM_SYSCALL(sched_setaffinity, 3, shim_do_sched_setaffinity, int, pid_t, pid, size_t, len,
+                    __kernel_cpu_set_t*, user_mask_ptr)
 
 DEFINE_SHIM_SYSCALL(sched_getaffinity, 3, shim_do_sched_getaffinity, int, pid_t, pid, size_t, len,
                     __kernel_cpu_set_t*, user_mask_ptr)

+ 145 - 5
LibOS/shim/src/sys/shim_sched.c

@@ -17,11 +17,16 @@
 /*
  * shim_sched.c
  *
- * Implementation of system call "sched_yield".
+ * Implementation of system calls "sched_yield", "setpriority", "getpriority",
+ * "sched_setparam", "sched_getparam", "sched_setscheduler", "sched_getscheduler",
+ * "sched_get_priority_max", "sched_get_priority_min", "sched_rr_get_interval",
+ * "sched_setaffinity", "sched_getaffinity".
  */
 
 #include <api.h>
 #include <errno.h>
+#include <linux/resource.h>
+#include <linux/sched.h>
 #include <pal.h>
 #include <shim_internal.h>
 #include <shim_table.h>
@@ -31,12 +36,123 @@ int shim_do_sched_yield(void) {
     return 0;
 }
 
-int shim_do_sched_getaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_mask_ptr) {
-    __UNUSED(pid);
-    int ncpus = PAL_CB(cpu_info.cpu_num);
+/* dummy implementation: ignore user-supplied niceval and return success */
+int shim_do_setpriority(int which, int who, int niceval) {
+    __UNUSED(who);
+
+    if (which != PRIO_PROCESS && which != PRIO_PGRP && which != PRIO_USER)
+        return -EINVAL;
+
+    if (niceval < 1 || niceval > 40)
+        return -EACCES;
+
+    return 0;
+}
+
+/* dummy implementation: always return the default nice value of 0 */
+int shim_do_getpriority(int which, int who) {
+    __UNUSED(who);
+
+    if (which != PRIO_PROCESS && which != PRIO_PGRP && which != PRIO_USER)
+        return -EINVAL;
+
+    return 20; /* default nice value on Linux */
+}
+
+/* dummy implementation: ignore user-supplied param and return success */
+int shim_do_sched_setparam(pid_t pid, struct __kernel_sched_param* param) {
+    if (pid < 0 || param == NULL)
+        return -EINVAL;
+
+    return 0;
+}
+
+/* dummy implementation: always return sched_priority of 0 (implies non-real-time sched policy) */
+int shim_do_sched_getparam(pid_t pid, struct __kernel_sched_param* param) {
+    if (pid < 0 || param == NULL)
+        return -EINVAL;
+
+    param->__sched_priority = 0;
+    return 0;
+}
+
+/* dummy implementation: ignore user-supplied policy & param and return success */
+int shim_do_sched_setscheduler(pid_t pid, int policy, struct __kernel_sched_param* param) {
+    policy &= ~SCHED_RESET_ON_FORK; /* ignore reset-on-fork flag */
+
+    if (pid < 0 || param == NULL)
+        return -EINVAL;
+
+    /* fail on unrecognized policies */
+    if (policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE && /* non-real-time */
+        policy != SCHED_FIFO && policy != SCHED_RR /* real-time */)
+        return -EINVAL;
+
+    /* non-real-time policies must have priority of 0 */
+    if ((policy == SCHED_NORMAL || policy == SCHED_BATCH || policy == SCHED_IDLE) &&
+        (param->__sched_priority != 0))
+        return -EINVAL;
+
+    /* real-time policies must have priority in range [1, 99] */
+    if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
+        (param->__sched_priority < 1 || param->__sched_priority > 99))
+        return -EINVAL;
+
+    return 0;
+}
+
+/* dummy implementation: always return SCHED_NORMAL (default round-robin time-sharing policy) */
+int shim_do_sched_getscheduler(pid_t pid) {
+    if (pid < 0)
+        return -EINVAL;
+
+    return SCHED_NORMAL;
+}
+
+int shim_do_sched_get_priority_max(int policy) {
+    /* fail on unrecognized policies */
+    if (policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE && /* non-real-time */
+        policy != SCHED_FIFO && policy != SCHED_RR /* real-time */)
+        return -EINVAL;
+
+    /* real-time policies have max priority of 99 */
+    if (policy == SCHED_FIFO || policy == SCHED_RR)
+        return 99;
+
+    /* non-real-time policies have max priority of 0 */
+    return 0;
+}
+
+int shim_do_sched_get_priority_min(int policy) {
+    /* fail on unrecognized policies */
+    if (policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE && /* non-real-time */
+        policy != SCHED_FIFO && policy != SCHED_RR /* real-time */)
+        return -EINVAL;
+
+    /* real-time policies have min priority of 1 */
+    if (policy == SCHED_FIFO || policy == SCHED_RR)
+        return 1;
+
+    /* non-real-time policies have min priority of 0 */
+    return 0;
+}
+
+/* dummy implementation: always return 100 ms (default in Linux) */
+int shim_do_sched_rr_get_interval(pid_t pid, struct timespec* interval) {
+    if (pid < 0)
+        return -EINVAL;
+
+    if (test_user_memory(interval, sizeof(*interval), true))
+        return -EFAULT;
 
+    interval->tv_sec  = 0;
+    interval->tv_nsec = 100000000; /* default value of 100 ms in Linux */
+    return 0;
+}
+
+static int check_affinity_params(int ncpus, size_t len, __kernel_cpu_set_t* user_mask_ptr) {
     /* Check that user_mask_ptr is valid; if not, should return -EFAULT */
-    if (test_user_memory(user_mask_ptr, len, 1))
+    if (test_user_memory(user_mask_ptr, len, true))
         return -EFAULT;
 
     /* Linux kernel bitmap is based on long. So according to its
@@ -49,6 +165,30 @@ int shim_do_sched_getaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_ma
     if (len & (sizeof(long) - 1))
         return -EINVAL;
 
+    return bitmask_size_in_bytes;
+}
+
+/* dummy implementation: ignore user-supplied mask and return success */
+int shim_do_sched_setaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_mask_ptr) {
+    __UNUSED(pid);
+    int ncpus = PAL_CB(cpu_info.cpu_num);
+
+    int bitmask_size_in_bytes = check_affinity_params(ncpus, len, user_mask_ptr);
+    if (bitmask_size_in_bytes < 0)
+        return bitmask_size_in_bytes;
+
+    return 0;
+}
+
+/* dummy implementation: always return all-ones (as many as there are host CPUs)  */
+int shim_do_sched_getaffinity(pid_t pid, size_t len, __kernel_cpu_set_t* user_mask_ptr) {
+    __UNUSED(pid);
+    int ncpus = PAL_CB(cpu_info.cpu_num);
+
+    int bitmask_size_in_bytes = check_affinity_params(ncpus, len, user_mask_ptr);
+    if (bitmask_size_in_bytes < 0)
+        return bitmask_size_in_bytes;
+
     memset(user_mask_ptr, 0, len);
     for (int i = 0; i < ncpus; i++) {
         ((uint8_t*)user_mask_ptr)[i / 8] |= 1 << (i % 8);

+ 77 - 0
LibOS/shim/test/regression/sched.c

@@ -0,0 +1,77 @@
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+
+/* This test checks that our dummy implementations work correctly. None of the
+ * below syscalls are actually propagated to the host OS or change anything.\
+ * NOTE: This test works correctly only on Graphene (not on Linux). */
+
+int main(int argc, char** argv) {
+    /* setters */
+    struct sched_param param = { .sched_priority = 50 };
+    if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
+        perror("Error setting scheduler\n");
+        return 1;
+    }
+
+    if (sched_setparam(0, &param) == -1) {
+        perror("Error setting param\n");
+        return 1;
+    }
+
+    if (setpriority(PRIO_PROCESS, 0, 10) == -1) {
+        perror("Error setting priority\n");
+        return 1;
+    }
+
+    cpu_set_t my_set;
+    CPU_ZERO(&my_set);
+    if (sched_setaffinity(0, sizeof(cpu_set_t), &my_set) == -1) {
+        perror("Error setting affinity\n");
+        return 1;
+    }
+
+    /* getters */
+    if (sched_getscheduler(0) != SCHED_OTHER) {
+        perror("Error getting scheduler\n");
+        return 2;
+    }
+
+    if (sched_getparam(0, &param) == -1 || param.sched_priority != 0) {
+        perror("Error getting param\n");
+        return 2;
+    }
+
+    if (getpriority(PRIO_PROCESS, 0) != 0) {
+        perror("Error getting priority\n");
+        return 2;
+    }
+
+    if (sched_getaffinity(0, sizeof(cpu_set_t), &my_set) == -1) {
+        perror("Error getting affinity\n");
+        return 2;
+    }
+
+    if (sched_get_priority_max(SCHED_FIFO) != 99) {
+        perror("Error getting max priority of SCHED_FIFO\n");
+        return 2;
+    }
+
+    if (sched_get_priority_min(SCHED_FIFO) != 1) {
+        perror("Error getting min priority of SCHED_FIFO\n");
+        return 2;
+    }
+
+    struct timespec interval = { 0 };
+    if (sched_rr_get_interval(0, &interval) == -1 ||
+            interval.tv_sec != 0 || interval.tv_nsec != 100000000) {
+        perror("Error getting interval of SCHED_RR\n");
+        return 2;
+    }
+
+    puts("Test completed successfully");
+    return 0;
+}

+ 6 - 0
LibOS/shim/test/regression/test_libos.py

@@ -324,6 +324,12 @@ class TC_30_Syscall(RegressionTestCase):
         self.assertIn('eventfd_using_poll completed successfully', stdout)
         self.assertIn('eventfd_using_various_flags completed successfully', stdout)
 
+    def test_080_sched(self):
+        stdout, stderr = self.run_binary(['sched'])
+
+        # Scheduling Syscalls Test
+        self.assertIn('Test completed successfully', stdout)
+
 @unittest.skipUnless(HAS_SGX,
     'This test is only meaningful on SGX PAL because only SGX catches raw '
     'syscalls and redirects to Graphene\'s LibOS. If we will add seccomp to '