Parcourir la source

[LibOS] Add error handling to helper threads

borysp il y a 4 ans
Parent
commit
6a6a852fb6

+ 45 - 2
LibOS/shim/src/ipc/shim_ipc_helper.c

@@ -194,10 +194,10 @@ int init_ipc_helper(void) {
      * locking mechanisms if not done already since we are going in multi-threaded mode) */
     enable_locking();
     lock(&ipc_helper_lock);
-    create_ipc_helper();
+    int ret = create_ipc_helper();
     unlock(&ipc_helper_lock);
 
-    return 0;
+    return ret;
 }
 
 static struct shim_ipc_port* __create_ipc_port(PAL_HANDLE hdl) {
@@ -448,6 +448,11 @@ int broadcast_ipc(struct shim_ipc_msg* msg, int target_type, struct shim_ipc_por
         size_t cnt = 0;
         struct shim_ipc_port** target_ports_heap =
             malloc(sizeof(struct shim_ipc_port*) * target_ports_cnt);
+        if (!target_ports_heap) {
+            unlock(&ipc_helper_lock);
+            debug("Allocation of target_ports_heap failed\n");
+            return -ENOMEM;
+        }
 
         LISTP_FOR_EACH_ENTRY(port, &port_list, list) {
             if (port == exclude_port)
@@ -532,6 +537,9 @@ static int receive_ipc_message(struct shim_ipc_port* port) {
     size_t bufsize   = IPC_MSG_MINIMAL_SIZE + readahead;
 
     struct shim_ipc_msg* msg = malloc(bufsize);
+    if (!msg) {
+        return -ENOMEM;
+    }
     size_t expected_size     = IPC_MSG_MINIMAL_SIZE;
     size_t bytes             = 0;
 
@@ -542,6 +550,10 @@ static int receive_ipc_message(struct shim_ipc_port* port) {
                 while (expected_size + readahead > bufsize)
                     bufsize *= 2;
                 void* tmp_buf = malloc(bufsize);
+                if (!tmp_buf) {
+                    ret = -ENOMEM;
+                    goto out;
+                }
                 memcpy(tmp_buf, msg, bytes);
                 free(msg);
                 msg = tmp_buf;
@@ -640,10 +652,22 @@ noreturn static void shim_ipc_helper(void* dummy) {
     size_t ports_cnt = 0;
     size_t ports_max_cnt = 32;
     struct shim_ipc_port** ports = malloc(sizeof(*ports) * ports_max_cnt);
+    if (!ports) {
+        debug("shim_ipc_helper: allocation of ports failed\n");
+        goto out_err;
+    }
     PAL_HANDLE* pals = malloc(sizeof(*pals) * (1 + ports_max_cnt));
+    if (!pals) {
+        debug("shim_ipc_helper: allocation of pals failed\n");
+        goto out_err;
+    }
 
     /* allocate one memory region to hold two PAL_FLG arrays: events and revents */
     PAL_FLG* pal_events = malloc(sizeof(*pal_events) * (1 + ports_max_cnt) * 2);
+    if (!pal_events) {
+        debug("shim_ipc_helper: allocation of pal_events failed\n");
+        goto out_err;
+    }
     PAL_FLG* ret_events = pal_events + 1 + ports_max_cnt;
 
     PAL_HANDLE install_new_event_pal = event_handle(&install_new_event);
@@ -670,8 +694,20 @@ noreturn static void shim_ipc_helper(void* dummy) {
             if (ports_cnt == ports_max_cnt) {
                 /* grow `ports` and `pals` to accommodate more objects */
                 struct shim_ipc_port** tmp_ports = malloc(sizeof(*tmp_ports) * ports_max_cnt * 2);
+                if (!tmp_ports) {
+                    debug("shim_ipc_helper: allocation of tmp_ports failed\n");
+                    goto out_err_unlock;
+                }
                 PAL_HANDLE* tmp_pals    = malloc(sizeof(*tmp_pals) * (1 + ports_max_cnt * 2));
+                if (!tmp_pals) {
+                    debug("shim_ipc_helper: allocation of tmp_pals failed\n");
+                    goto out_err_unlock;
+                }
                 PAL_FLG* tmp_pal_events = malloc(sizeof(*tmp_pal_events) * (2 + ports_max_cnt * 4));
+                if (!tmp_pal_events) {
+                    debug("shim_ipc_helper: allocation of tmp_pal_events failed\n");
+                    goto out_err_unlock;
+                }
                 PAL_FLG* tmp_ret_events = tmp_pal_events + 1 + ports_max_cnt * 2;
 
                 memcpy(tmp_ports, ports, sizeof(*tmp_ports) * ports_max_cnt);
@@ -771,6 +807,13 @@ noreturn static void shim_ipc_helper(void* dummy) {
     debug("IPC helper thread terminated\n");
 
     DkThreadExit(/*clear_child_tid=*/NULL);
+
+out_err_unlock:
+    unlock(&ipc_helper_lock);
+out_err:
+    debug("Terminating the process due to a fatal error in ipc helper\n");
+    put_thread(self);
+    DkProcessExit(1);
 }
 
 static void shim_ipc_helper_prepare(void* arg) {

+ 43 - 3
LibOS/shim/src/shim_async.c

@@ -64,17 +64,25 @@ static int create_async_helper(void);
  *   - async IO events set object = handle and time = 0.
  *
  * Function returns remaining usecs for alarm/timer events (same as alarm())
- * or 0 for async IO events. On error, it returns -1.
+ * or 0 for async IO events. On error, it returns a negated error code.
  */
 int64_t install_async_event(PAL_HANDLE object, uint64_t time,
                             void (*callback)(IDTYPE caller, void* arg), void* arg) {
     /* if event happens on object, time must be zero */
     assert(!object || (object && !time));
 
-    uint64_t now                  = DkSystemTimeQuery();
+    uint64_t now = DkSystemTimeQuery();
+    if ((int64_t)now < 0) {
+        return (int64_t)now;
+    }
+
     uint64_t max_prev_expire_time = now;
 
     struct async_event* event = malloc(sizeof(struct async_event));
+    if (!event) {
+        return -ENOMEM;
+    }
+
     event->callback           = callback;
     event->arg                = arg;
     event->caller             = get_cur_tid();
@@ -171,9 +179,17 @@ static void shim_async_helper(void* arg) {
     /* init `pals` so that it always contains at least install_new_event */
     size_t pals_max_cnt = 32;
     PAL_HANDLE* pals = malloc(sizeof(*pals) * (1 + pals_max_cnt));
+    if (!pals) {
+        debug("Allocation of pals failed\n");
+        goto out_err;
+    }
 
     /* allocate one memory region to hold two PAL_FLG arrays: events and revents */
     PAL_FLG* pal_events = malloc(sizeof(*pal_events) * (1 + pals_max_cnt) * 2);
+    if (!pal_events) {
+        debug("Allocation of pal_events failed\n");
+        goto out_err;
+    }
     PAL_FLG* ret_events = pal_events + 1 + pals_max_cnt;
 
     PAL_HANDLE install_new_event_pal = event_handle(&install_new_event);
@@ -183,6 +199,10 @@ static void shim_async_helper(void* arg) {
 
     while (true) {
         uint64_t now = DkSystemTimeQuery();
+        if ((int64_t)now < 0) {
+            debug("DkSystemTimeQuery failed with: %ld\n", (int64_t)now);
+            goto out_err;
+        }
 
         lock(&async_helper_lock);
         if (async_helper_state != HELPER_ALIVE) {
@@ -201,8 +221,16 @@ static void shim_async_helper(void* arg) {
             if (tmp->object) {
                 if (pals_cnt == pals_max_cnt) {
                     /* grow `pals` to accommodate more objects */
-                    PAL_HANDLE* tmp_pals    = malloc(sizeof(*tmp_pals) * (1 + pals_max_cnt * 2));
+                    PAL_HANDLE* tmp_pals = malloc(sizeof(*tmp_pals) * (1 + pals_max_cnt * 2));
+                    if (!tmp_pals) {
+                        debug("tmp_pals allocation failed\n");
+                        goto out_err_unlock;
+                    }
                     PAL_FLG* tmp_pal_events = malloc(sizeof(*tmp_pal_events) * (2 + pals_max_cnt * 4));
+                    if (!tmp_pal_events) {
+                        debug("tmp_pal_events allocation failed\n");
+                        goto out_err_unlock;
+                    }
                     PAL_FLG* tmp_ret_events = tmp_pal_events + 1 + pals_max_cnt * 2;
 
                     memcpy(tmp_pals, pals, sizeof(*tmp_pals) * (1 + pals_max_cnt));
@@ -257,6 +285,10 @@ static void shim_async_helper(void* arg) {
         PAL_BOL polled = DkStreamsWaitEvents(pals_cnt + 1, pals, pal_events, ret_events, sleep_time);
 
         now = DkSystemTimeQuery();
+        if ((int64_t)now < 0) {
+            debug("DkSystemTimeQuery failed with: %ld\n", (int64_t)now);
+            goto out_err;
+        }
 
         LISTP_TYPE(async_event) triggered;
         INIT_LISTP(&triggered);
@@ -320,6 +352,14 @@ static void shim_async_helper(void* arg) {
     free(pal_events);
 
     DkThreadExit(/*clear_child_tid=*/NULL);
+    return;
+
+out_err_unlock:
+    unlock(&async_helper_lock);
+out_err:
+    debug("Terminating the process due to a fatal error in async helper\n");
+    put_thread(self);
+    DkProcessExit(1);
 }
 
 /* this should be called with the async_helper_lock held */

+ 6 - 1
LibOS/shim/src/sys/shim_exit.c

@@ -140,7 +140,12 @@ noreturn void thread_or_process_exit(int error_code, int term_signal) {
     if (check_last_thread(cur_thread)) {
         /* ask Async Helper thread to cleanup this thread */
         cur_thread->clear_child_tid_pal = 1; /* any non-zero value suffices */
-        install_async_event(NULL, 0, &cleanup_thread, cur_thread);
+        int64_t ret = install_async_event(NULL, 0, &cleanup_thread, cur_thread);
+        if (ret < 0) {
+            debug("failed to set up async cleanup_thread (exiting without clear child tid),"
+                  " return code: %ld\n", ret);
+            DkThreadExit(NULL);
+        }
 
         DkThreadExit(&cur_thread->clear_child_tid_pal);
     }