/* Copyright (c) 2013-2017, The Tor Project, Inc. */ /* See LICENSE for licensing information */ #include "or.h" #include "config.h" #include "compat_libevent.h" #define SCHEDULER_PRIVATE_ #define SCHEDULER_KIST_PRIVATE #include "scheduler.h" #include "main.h" #include /** * \file scheduler.c * \brief Channel scheduling system: decides which channels should send and * receive when. * * This module is the global/common parts of the scheduling system. This system * is what decides what channels get to send cells on their circuits and when. * * Terms: * - "Scheduling system": the collection of scheduler*.{h,c} files and their * aggregate behavior. * - "Scheduler implementation": a scheduler_t. The scheduling system has one * active scheduling implementation at a time. * * In this file you will find state that any scheduler implementation can have * access to as well as the functions the rest of Tor uses to interact with the * scheduling system. * * The earliest versions of Tor approximated a kind of round-robin system * among active connections, but only approximated it. It would only consider * one connection (roughly equal to a channel in today's terms) at a time, and * thus could only prioritize circuits against others on the same connection. * * Then in response to the KIST paper[0], Tor implemented a global * circuit scheduler. It was supposed to prioritize circuits across many * channels, but wasn't effective. It is preserved in scheduler_vanilla.c. * * [0]: http://www.robgjansen.com/publications/kist-sec2014.pdf * * Then we actually got around to implementing KIST for real. We decided to * modularize the scheduler so new ones can be implemented. You can find KIST * in scheduler_kist.c. * * Channels have one of four scheduling states based on whether or not they * have cells to send and whether or not they are able to send. * *
    *
  1. * Not open for writes, no cells to send. *
    • Not much to do here, and the channel will have scheduler_state * == SCHED_CHAN_IDLE *
    • Transitions from: *
        *
      • Open for writes/has cells by simultaneously draining all circuit * queues and filling the output buffer. *
      *
    • Transitions to: *
        *
      • Not open for writes/has cells by arrival of cells on an attached * circuit (this would be driven from append_cell_to_circuit_queue()) *
      • Open for writes/no cells by a channel type specific path; * driven from connection_or_flushed_some() for channel_tls_t. *
      *
    * *
  2. Open for writes, no cells to send *
      *
    • Not much here either; this will be the state an idle but open * channel can be expected to settle in. It will have scheduler_state * == SCHED_CHAN_WAITING_FOR_CELLS *
    • Transitions from: *
        *
      • Not open for writes/no cells by flushing some of the output * buffer. *
      • Open for writes/has cells by the scheduler moving cells from * circuit queues to channel output queue, but not having enough * to fill the output queue. *
      *
    • Transitions to: *
        *
      • Open for writes/has cells by arrival of new cells on an attached * circuit, in append_cell_to_circuit_queue() *
      *
    * *
  3. Not open for writes, cells to send *
      *
    • This is the state of a busy circuit limited by output bandwidth; * cells have piled up in the circuit queues waiting to be relayed. * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE. *
    • Transitions from: *
        *
      • Not open for writes/no cells by arrival of cells on an attached * circuit *
      • Open for writes/has cells by filling an output buffer without * draining all cells from attached circuits *
      *
    • Transitions to: *
        *
      • Opens for writes/has cells by draining some of the output buffer * via the connection_or_flushed_some() path (for channel_tls_t). *
      *
    * *
  4. Open for writes, cells to send *
      *
    • This connection is ready to relay some cells and waiting for * the scheduler to choose it. The channel will have scheduler_state == * SCHED_CHAN_PENDING. *
    • Transitions from: *
        *
      • Not open for writes/has cells by the connection_or_flushed_some() * path *
      • Open for writes/no cells by the append_cell_to_circuit_queue() * path *
      *
    • Transitions to: *
        *
      • Not open for writes/no cells by draining all circuit queues and * simultaneously filling the output buffer. *
      • Not open for writes/has cells by writing enough cells to fill the * output buffer *
      • Open for writes/no cells by draining all attached circuit queues * without also filling the output buffer *
      *
    *
* * Other event-driven parts of the code move channels between these scheduling * states by calling scheduler functions. The scheduling system builds up a * list of channels in the SCHED_CHAN_PENDING state that the scheduler * implementation should then use when it runs. Scheduling implementations need * to properly update channel states during their scheduler_t->run() function * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING * to any other state. * * The remainder of this file is a small amount of state that any scheduler * implementation should have access to, and the functions the rest of Tor uses * to interact with the scheduling system. */ /***************************************************************************** * Scheduling system state * * State that can be accessed from any scheduler implementation (but not * outside the scheduling system) *****************************************************************************/ /** DOCDOC */ STATIC const scheduler_t *the_scheduler; /** * We keep a list of channels that are pending - i.e, have cells to write * and can accept them to send. The enum scheduler_state in channel_t * is reserved for our use. * * Priority queue of channels that can write and have cells (pending work) */ STATIC smartlist_t *channels_pending = NULL; /** * This event runs the scheduler from its callback, and is manually * activated whenever a channel enters open for writes/cells to send. */ STATIC struct event *run_sched_ev = NULL; /***************************************************************************** * Scheduling system static function definitions * * Functions that can only be accessed from this file. *****************************************************************************/ /** Return a human readable string for the given scheduler type. */ static const char * get_scheduler_type_string(scheduler_types_t type) { switch (type) { case SCHEDULER_VANILLA: return "Vanilla"; case SCHEDULER_KIST: return "KIST"; case SCHEDULER_KIST_LITE: return "KISTLite"; case SCHEDULER_NONE: /* fallthrough */ default: tor_assert_unreached(); return "(N/A)"; } } /** * Scheduler event callback; this should get triggered once per event loop * if any scheduling work was created during the event loop. */ static void scheduler_evt_callback(evutil_socket_t fd, short events, void *arg) { (void) fd; (void) events; (void) arg; log_debug(LD_SCHED, "Scheduler event callback called"); /* Run the scheduler. This is a mandatory function. */ /* We might as well assert on this. If this function doesn't exist, no cells * are getting scheduled. Things are very broken. scheduler_t says the run() * function is mandatory. */ tor_assert(the_scheduler->run); the_scheduler->run(); /* Schedule itself back in if it has more work. */ /* Again, might as well assert on this mandatory scheduler_t function. If it * doesn't exist, there's no way to tell libevent to run the scheduler again * in the future. */ tor_assert(the_scheduler->schedule); the_scheduler->schedule(); } /** Using the global options, select the scheduler we should be using. */ static void select_scheduler(void) { scheduler_t *new_scheduler = NULL; #ifdef TOR_UNIT_TESTS /* This is hella annoying to set in the options for every test that passes * through the scheduler and there are many so if we don't explicitly have * a list of types set, just put the vanilla one. */ if (get_options()->SchedulerTypes_ == NULL) { the_scheduler = get_vanilla_scheduler(); return; } #endif /* defined(TOR_UNIT_TESTS) */ /* This list is ordered that is first entry has the first priority. Thus, as * soon as we find a scheduler type that we can use, we use it and stop. */ SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) { switch (*type) { case SCHEDULER_VANILLA: new_scheduler = get_vanilla_scheduler(); goto end; case SCHEDULER_KIST: if (!scheduler_can_use_kist()) { #ifdef HAVE_KIST_SUPPORT log_notice(LD_SCHED, "Scheduler type KIST has been disabled by " "the consensus or no kernel support."); #else /* !(defined(HAVE_KIST_SUPPORT)) */ log_info(LD_SCHED, "Scheduler type KIST not built in"); #endif /* defined(HAVE_KIST_SUPPORT) */ continue; } new_scheduler = get_kist_scheduler(); scheduler_kist_set_full_mode(); goto end; case SCHEDULER_KIST_LITE: new_scheduler = get_kist_scheduler(); scheduler_kist_set_lite_mode(); goto end; case SCHEDULER_NONE: /* fallthrough */ default: /* Our option validation should have caught this. */ tor_assert_unreached(); } } SMARTLIST_FOREACH_END(type); end: if (new_scheduler == NULL) { log_err(LD_SCHED, "Tor was unable to select a scheduler type. Please " "make sure Schedulers is correctly configured with " "what Tor does support."); /* We weren't able to choose a scheduler which means that none of the ones * set in Schedulers are supported or usable. We will respect the user * wishes of using what it has been configured and don't do a sneaky * fallback. Because this can be changed at runtime, we have to stop tor * right now. */ exit(1); // XXXX bad exit } /* Set the chosen scheduler. */ the_scheduler = new_scheduler; } /** * Helper function called from a few different places. It changes the * scheduler implementation, if necessary. And if it did, it then tells the * old one to free its state and the new one to initialize. */ static void set_scheduler(void) { const scheduler_t *old_scheduler = the_scheduler; scheduler_types_t old_scheduler_type = SCHEDULER_NONE; /* We keep track of the type in order to log only if the type switched. We * can't just use the scheduler pointers because KIST and KISTLite share the * same object. */ if (the_scheduler) { old_scheduler_type = the_scheduler->type; } /* From the options, select the scheduler type to set. */ select_scheduler(); tor_assert(the_scheduler); /* We look at the pointer difference in case the old sched and new sched * share the same scheduler object, as is the case with KIST and KISTLite. */ if (old_scheduler != the_scheduler) { /* Allow the old scheduler to clean up, if needed. */ if (old_scheduler && old_scheduler->free_all) { old_scheduler->free_all(); } /* Initialize the new scheduler. */ if (the_scheduler->init) { the_scheduler->init(); } } /* Finally we notice log if we switched schedulers. We use the type in case * two schedulers share a scheduler object. */ if (old_scheduler_type != the_scheduler->type) { log_notice(LD_CONFIG, "Scheduler type %s has been enabled.", get_scheduler_type_string(the_scheduler->type)); } } /***************************************************************************** * Scheduling system private function definitions * * Functions that can only be accessed from scheduler*.c *****************************************************************************/ /** Return the pending channel list. */ smartlist_t * get_channels_pending(void) { return channels_pending; } /** Comparison function to use when sorting pending channels. */ MOCK_IMPL(int, scheduler_compare_channels, (const void *c1_v, const void *c2_v)) { const channel_t *c1 = NULL, *c2 = NULL; /* These are a workaround for -Wbad-function-cast throwing a fit */ const circuitmux_policy_t *p1, *p2; uintptr_t p1_i, p2_i; tor_assert(c1_v); tor_assert(c2_v); c1 = (const channel_t *)(c1_v); c2 = (const channel_t *)(c2_v); if (c1 != c2) { if (circuitmux_get_policy(c1->cmux) == circuitmux_get_policy(c2->cmux)) { /* Same cmux policy, so use the mux comparison */ return circuitmux_compare_muxes(c1->cmux, c2->cmux); } else { /* * Different policies; not important to get this edge case perfect * because the current code never actually gives different channels * different cmux policies anyway. Just use this arbitrary but * definite choice. */ p1 = circuitmux_get_policy(c1->cmux); p2 = circuitmux_get_policy(c2->cmux); p1_i = (uintptr_t)p1; p2_i = (uintptr_t)p2; return (p1_i < p2_i) ? -1 : 1; } } else { /* c1 == c2, so always equal */ return 0; } } /***************************************************************************** * Scheduling system global functions * * Functions that can be accessed from anywhere in Tor. *****************************************************************************/ /** * This is how the scheduling system is notified of Tor's configuration * changing. For example: a SIGHUP was issued. */ void scheduler_conf_changed(void) { /* Let the scheduler decide what it should do. */ set_scheduler(); /* Then tell the (possibly new) scheduler that we have new options. */ if (the_scheduler->on_new_options) { the_scheduler->on_new_options(); } } /** * Whenever we get a new consensus, this function is called. */ void scheduler_notify_networkstatus_changed(const networkstatus_t *old_c, const networkstatus_t *new_c) { /* Maybe the consensus param made us change the scheduler. */ set_scheduler(); /* Then tell the (possibly new) scheduler that we have a new consensus */ if (the_scheduler->on_new_consensus) { the_scheduler->on_new_consensus(old_c, new_c); } } /** * Free everything scheduling-related from main.c. Note this is only called * when Tor is shutting down, while scheduler_t->free_all() is called both when * Tor is shutting down and when we are switching schedulers. */ void scheduler_free_all(void) { log_debug(LD_SCHED, "Shutting down scheduler"); if (run_sched_ev) { if (event_del(run_sched_ev) < 0) { log_warn(LD_BUG, "Problem deleting run_sched_ev"); } tor_event_free(run_sched_ev); run_sched_ev = NULL; } if (channels_pending) { /* We don't have ownership of the objects in this list. */ smartlist_free(channels_pending); channels_pending = NULL; } if (the_scheduler && the_scheduler->free_all) { the_scheduler->free_all(); } the_scheduler = NULL; } /** Mark a channel as no longer ready to accept writes. */ MOCK_IMPL(void, scheduler_channel_doesnt_want_writes,(channel_t *chan)) { IF_BUG_ONCE(!chan) { return; } IF_BUG_ONCE(!channels_pending) { return; } /* If it's already in pending, we can put it in waiting_to_write */ if (chan->scheduler_state == SCHED_CHAN_PENDING) { /* * It's in channels_pending, so it shouldn't be in any of * the other lists. It can't write any more, so it goes to * channels_waiting_to_write. */ smartlist_pqueue_remove(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p went from pending " "to waiting_to_write", U64_PRINTF_ARG(chan->global_identifier), chan); } else { /* * It's not in pending, so it can't become waiting_to_write; it's * either not in any of the lists (nothing to do) or it's already in * waiting_for_cells (remove it, can't write any more). */ if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { chan->scheduler_state = SCHED_CHAN_IDLE; log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p left waiting_for_cells", U64_PRINTF_ARG(chan->global_identifier), chan); } } } /** Mark a channel as having waiting cells. */ MOCK_IMPL(void, scheduler_channel_has_waiting_cells,(channel_t *chan)) { IF_BUG_ONCE(!chan) { return; } IF_BUG_ONCE(!channels_pending) { return; } /* First, check if it's also writeable */ if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { /* * It's in channels_waiting_for_cells, so it shouldn't be in any of * the other lists. It has waiting cells now, so it goes to * channels_pending. */ chan->scheduler_state = SCHED_CHAN_PENDING; smartlist_pqueue_add(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p went from waiting_for_cells " "to pending", U64_PRINTF_ARG(chan->global_identifier), chan); /* If we made a channel pending, we potentially have scheduling work to * do. */ the_scheduler->schedule(); } else { /* * It's not in waiting_for_cells, so it can't become pending; it's * either not in any of the lists (we add it to waiting_to_write) * or it's already in waiting_to_write or pending (we do nothing) */ if (!(chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE || chan->scheduler_state == SCHED_CHAN_PENDING)) { chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE; log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p entered waiting_to_write", U64_PRINTF_ARG(chan->global_identifier), chan); } } } /** Add the scheduler event to the set of pending events with next_run being * the longest time libevent should wait before triggering the event. */ void scheduler_ev_add(const struct timeval *next_run) { tor_assert(run_sched_ev); tor_assert(next_run); if (BUG(event_add(run_sched_ev, next_run) < 0)) { log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: " "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec); return; } } /** Make the scheduler event active with the given flags. */ void scheduler_ev_active(int flags) { tor_assert(run_sched_ev); event_active(run_sched_ev, flags, 1); } /* * Initialize everything scheduling-related from config.c. Note this is only * called when Tor is starting up, while scheduler_t->init() is called both * when Tor is starting up and when we are switching schedulers. */ void scheduler_init(void) { log_debug(LD_SCHED, "Initting scheduler"); // Two '!' because we really do want to check if the pointer is non-NULL IF_BUG_ONCE(!!run_sched_ev) { log_warn(LD_SCHED, "We should not already have a libevent scheduler event." "I'll clean the old one up, but this is odd."); tor_event_free(run_sched_ev); run_sched_ev = NULL; } run_sched_ev = tor_event_new(tor_libevent_get_base(), -1, 0, scheduler_evt_callback, NULL); channels_pending = smartlist_new(); set_scheduler(); } /* * If a channel is going away, this is how the scheduling system is informed * so it can do any freeing necessary. This ultimately calls * scheduler_t->on_channel_free() so the current scheduler can release any * state specific to this channel. */ MOCK_IMPL(void, scheduler_release_channel,(channel_t *chan)) { IF_BUG_ONCE(!chan) { return; } IF_BUG_ONCE(!channels_pending) { return; } if (chan->scheduler_state == SCHED_CHAN_PENDING) { if (smartlist_pos(channels_pending, chan) == -1) { log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " " "but it wasn't in channels_pending", chan->global_identifier); } else { smartlist_pqueue_remove(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); } } if (the_scheduler->on_channel_free) { the_scheduler->on_channel_free(chan); } chan->scheduler_state = SCHED_CHAN_IDLE; } /** Mark a channel as ready to accept writes */ void scheduler_channel_wants_writes(channel_t *chan) { IF_BUG_ONCE(!chan) { return; } IF_BUG_ONCE(!channels_pending) { return; } /* If it's already in waiting_to_write, we can put it in pending */ if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) { /* * It can write now, so it goes to channels_pending. */ log_debug(LD_SCHED, "chan=%" PRIu64 " became pending", chan->global_identifier); smartlist_pqueue_add(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); chan->scheduler_state = SCHED_CHAN_PENDING; log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p went from waiting_to_write " "to pending", U64_PRINTF_ARG(chan->global_identifier), chan); /* We just made a channel pending, we have scheduling work to do. */ the_scheduler->schedule(); } else { /* * It's not in SCHED_CHAN_WAITING_TO_WRITE, so it can't become pending; * it's either idle and goes to WAITING_FOR_CELLS, or it's a no-op. */ if (!(chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS || chan->scheduler_state == SCHED_CHAN_PENDING)) { chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS; log_debug(LD_SCHED, "Channel " U64_FORMAT " at %p entered waiting_for_cells", U64_PRINTF_ARG(chan->global_identifier), chan); } } } #ifdef TOR_UNIT_TESTS /* * Notify scheduler that a channel's queue position may have changed. */ void scheduler_touch_channel(channel_t *chan) { IF_BUG_ONCE(!chan) { return; } if (chan->scheduler_state == SCHED_CHAN_PENDING) { /* Remove and re-add it */ smartlist_pqueue_remove(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); smartlist_pqueue_add(channels_pending, scheduler_compare_channels, offsetof(channel_t, sched_heap_idx), chan); } /* else no-op, since it isn't in the queue */ } #endif /* defined(TOR_UNIT_TESTS) */