Browse Source

sched: When releasing a channel, do not BUG() if absent from the pending list

The current code flow makes it that we can release a channel in a PENDING
state but not in the pending list. This happens while the channel is being
processed in the scheduler loop.

Fixes #25125

Signed-off-by: David Goulet <dgoulet@torproject.org>
David Goulet 6 years ago
parent
commit
005e228f80
2 changed files with 27 additions and 11 deletions
  1. 15 11
      src/or/scheduler.c
  2. 12 0
      src/test/test_scheduler.c

+ 15 - 11
src/or/scheduler.c

@@ -628,17 +628,21 @@ scheduler_release_channel,(channel_t *chan))
     return;
   }
 
-  if (chan->scheduler_state == SCHED_CHAN_PENDING) {
-    if (SCHED_BUG(smartlist_pos(channels_pending, chan) == -1, chan)) {
-      log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " "
-                         "but it wasn't in channels_pending",
-               chan->global_identifier);
-    } else {
-      smartlist_pqueue_remove(channels_pending,
-                              scheduler_compare_channels,
-                              offsetof(channel_t, sched_heap_idx),
-                              chan);
-    }
+  /* Try to remove the channel from the pending list regardless of its
+   * scheduler state. We can release a channel in many places in the tor code
+   * so we can't rely on the channel state (PENDING) to remove it from the
+   * list.
+   *
+   * For instance, the channel can change state from OPEN to CLOSING while
+   * being handled in the scheduler loop leading to the channel being in
+   * PENDING state but not in the pending list. Furthermore, we release the
+   * channel when it changes state to close and a second time when we free it.
+   * Not ideal at all but for now that is the way it is. */
+  if (chan->sched_heap_idx != -1) {
+    smartlist_pqueue_remove(channels_pending,
+                            scheduler_compare_channels,
+                            offsetof(channel_t, sched_heap_idx),
+                            chan);
   }
 
   if (the_scheduler->on_channel_free) {

+ 12 - 0
src/test/test_scheduler.c

@@ -502,6 +502,18 @@ perform_channel_state_tests(int KISTSchedRunInterval, int sched_type)
   scheduler_touch_channel(ch1);
   tt_assert(scheduler_compare_channels_mock_ctr > old_count);
 
+  /* Release the ch2 and then do it another time to make sure it doesn't blow
+   * up and we are still in a quiescent state. */
+  scheduler_release_channel(ch2);
+  tt_int_op(ch2->scheduler_state, OP_EQ, SCHED_CHAN_IDLE);
+  tt_int_op(smartlist_len(channels_pending), OP_EQ, 1);
+  /* Cheat a bit so make the release more confused but also will tells us if
+   * the release did put the channel in the right state. */
+  ch2->scheduler_state = SCHED_CHAN_PENDING;
+  scheduler_release_channel(ch2);
+  tt_int_op(ch2->scheduler_state, OP_EQ, SCHED_CHAN_IDLE);
+  tt_int_op(smartlist_len(channels_pending), OP_EQ, 1);
+
   /* Close */
   channel_mark_for_close(ch1);
   tt_int_op(ch1->state, OP_EQ, CHANNEL_STATE_CLOSING);