123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697 |
- /* Copyright (c) 2013-2017, The Tor Project, Inc. */
- /* See LICENSE for licensing information */
- #include "or.h"
- #include "config.h"
- #include "compat_libevent.h"
- #define SCHEDULER_PRIVATE_
- #define SCHEDULER_KIST_PRIVATE
- #include "scheduler.h"
- #include "main.h"
- #include <event2/event.h>
- /**
- * \file scheduler.c
- * \brief Channel scheduling system: decides which channels should send and
- * receive when.
- *
- * This module is the global/common parts of the scheduling system. This system
- * is what decides what channels get to send cells on their circuits and when.
- *
- * Terms:
- * - "Scheduling system": the collection of scheduler*.{h,c} files and their
- * aggregate behavior.
- * - "Scheduler implementation": a scheduler_t. The scheduling system has one
- * active scheduling implementation at a time.
- *
- * In this file you will find state that any scheduler implementation can have
- * access to as well as the functions the rest of Tor uses to interact with the
- * scheduling system.
- *
- * The earliest versions of Tor approximated a kind of round-robin system
- * among active connections, but only approximated it. It would only consider
- * one connection (roughly equal to a channel in today's terms) at a time, and
- * thus could only prioritize circuits against others on the same connection.
- *
- * Then in response to the KIST paper[0], Tor implemented a global
- * circuit scheduler. It was supposed to prioritize circuits across many
- * channels, but wasn't effective. It is preserved in scheduler_vanilla.c.
- *
- * [0]: http://www.robgjansen.com/publications/kist-sec2014.pdf
- *
- * Then we actually got around to implementing KIST for real. We decided to
- * modularize the scheduler so new ones can be implemented. You can find KIST
- * in scheduler_kist.c.
- *
- * Channels have one of four scheduling states based on whether or not they
- * have cells to send and whether or not they are able to send.
- *
- * <ol>
- * <li>
- * Not open for writes, no cells to send.
- * <ul><li> Not much to do here, and the channel will have scheduler_state
- * == SCHED_CHAN_IDLE
- * <li> Transitions from:
- * <ul>
- * <li>Open for writes/has cells by simultaneously draining all circuit
- * queues and filling the output buffer.
- * </ul>
- * <li> Transitions to:
- * <ul>
- * <li> Not open for writes/has cells by arrival of cells on an attached
- * circuit (this would be driven from append_cell_to_circuit_queue())
- * <li> Open for writes/no cells by a channel type specific path;
- * driven from connection_or_flushed_some() for channel_tls_t.
- * </ul>
- * </ul>
- *
- * <li> Open for writes, no cells to send
- * <ul>
- * <li>Not much here either; this will be the state an idle but open
- * channel can be expected to settle in. It will have scheduler_state
- * == SCHED_CHAN_WAITING_FOR_CELLS
- * <li> Transitions from:
- * <ul>
- * <li>Not open for writes/no cells by flushing some of the output
- * buffer.
- * <li>Open for writes/has cells by the scheduler moving cells from
- * circuit queues to channel output queue, but not having enough
- * to fill the output queue.
- * </ul>
- * <li> Transitions to:
- * <ul>
- * <li>Open for writes/has cells by arrival of new cells on an attached
- * circuit, in append_cell_to_circuit_queue()
- * </ul>
- * </ul>
- *
- * <li>Not open for writes, cells to send
- * <ul>
- * <li>This is the state of a busy circuit limited by output bandwidth;
- * cells have piled up in the circuit queues waiting to be relayed.
- * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE.
- * <li> Transitions from:
- * <ul>
- * <li>Not open for writes/no cells by arrival of cells on an attached
- * circuit
- * <li>Open for writes/has cells by filling an output buffer without
- * draining all cells from attached circuits
- * </ul>
- * <li> Transitions to:
- * <ul>
- * <li>Opens for writes/has cells by draining some of the output buffer
- * via the connection_or_flushed_some() path (for channel_tls_t).
- * </ul>
- * </ul>
- *
- * <li>Open for writes, cells to send
- * <ul>
- * <li>This connection is ready to relay some cells and waiting for
- * the scheduler to choose it. The channel will have scheduler_state ==
- * SCHED_CHAN_PENDING.
- * <li>Transitions from:
- * <ul>
- * <li>Not open for writes/has cells by the connection_or_flushed_some()
- * path
- * <li>Open for writes/no cells by the append_cell_to_circuit_queue()
- * path
- * </ul>
- * <li> Transitions to:
- * <ul>
- * <li>Not open for writes/no cells by draining all circuit queues and
- * simultaneously filling the output buffer.
- * <li>Not open for writes/has cells by writing enough cells to fill the
- * output buffer
- * <li>Open for writes/no cells by draining all attached circuit queues
- * without also filling the output buffer
- * </ul>
- * </ul>
- * </ol>
- *
- * Other event-driven parts of the code move channels between these scheduling
- * states by calling scheduler functions. The scheduling system builds up a
- * list of channels in the SCHED_CHAN_PENDING state that the scheduler
- * implementation should then use when it runs. Scheduling implementations need
- * to properly update channel states during their scheduler_t->run() function
- * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING
- * to any other state.
- *
- * The remainder of this file is a small amount of state that any scheduler
- * implementation should have access to, and the functions the rest of Tor uses
- * to interact with the scheduling system.
- */
- /*****************************************************************************
- * Scheduling system state
- *
- * State that can be accessed from any scheduler implementation (but not
- * outside the scheduling system)
- *****************************************************************************/
- /** DOCDOC */
- STATIC const scheduler_t *the_scheduler;
- /**
- * We keep a list of channels that are pending - i.e, have cells to write
- * and can accept them to send. The enum scheduler_state in channel_t
- * is reserved for our use.
- *
- * Priority queue of channels that can write and have cells (pending work)
- */
- STATIC smartlist_t *channels_pending = NULL;
- /**
- * This event runs the scheduler from its callback, and is manually
- * activated whenever a channel enters open for writes/cells to send.
- */
- STATIC struct event *run_sched_ev = NULL;
- /*****************************************************************************
- * Scheduling system static function definitions
- *
- * Functions that can only be accessed from this file.
- *****************************************************************************/
- /** Return a human readable string for the given scheduler type. */
- static const char *
- get_scheduler_type_string(scheduler_types_t type)
- {
- switch (type) {
- case SCHEDULER_VANILLA:
- return "Vanilla";
- case SCHEDULER_KIST:
- return "KIST";
- case SCHEDULER_KIST_LITE:
- return "KISTLite";
- case SCHEDULER_NONE:
- /* fallthrough */
- default:
- tor_assert_unreached();
- return "(N/A)";
- }
- }
- /**
- * Scheduler event callback; this should get triggered once per event loop
- * if any scheduling work was created during the event loop.
- */
- static void
- scheduler_evt_callback(evutil_socket_t fd, short events, void *arg)
- {
- (void) fd;
- (void) events;
- (void) arg;
- log_debug(LD_SCHED, "Scheduler event callback called");
- /* Run the scheduler. This is a mandatory function. */
- /* We might as well assert on this. If this function doesn't exist, no cells
- * are getting scheduled. Things are very broken. scheduler_t says the run()
- * function is mandatory. */
- tor_assert(the_scheduler->run);
- the_scheduler->run();
- /* Schedule itself back in if it has more work. */
- /* Again, might as well assert on this mandatory scheduler_t function. If it
- * doesn't exist, there's no way to tell libevent to run the scheduler again
- * in the future. */
- tor_assert(the_scheduler->schedule);
- the_scheduler->schedule();
- }
- /** Using the global options, select the scheduler we should be using. */
- static void
- select_scheduler(void)
- {
- scheduler_t *new_scheduler = NULL;
- #ifdef TOR_UNIT_TESTS
- /* This is hella annoying to set in the options for every test that passes
- * through the scheduler and there are many so if we don't explicitly have
- * a list of types set, just put the vanilla one. */
- if (get_options()->SchedulerTypes_ == NULL) {
- the_scheduler = get_vanilla_scheduler();
- return;
- }
- #endif /* defined(TOR_UNIT_TESTS) */
- /* This list is ordered that is first entry has the first priority. Thus, as
- * soon as we find a scheduler type that we can use, we use it and stop. */
- SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) {
- switch (*type) {
- case SCHEDULER_VANILLA:
- new_scheduler = get_vanilla_scheduler();
- goto end;
- case SCHEDULER_KIST:
- if (!scheduler_can_use_kist()) {
- #ifdef HAVE_KIST_SUPPORT
- log_notice(LD_SCHED, "Scheduler type KIST has been disabled by "
- "the consensus or no kernel support.");
- #else /* !(defined(HAVE_KIST_SUPPORT)) */
- log_info(LD_SCHED, "Scheduler type KIST not built in");
- #endif /* defined(HAVE_KIST_SUPPORT) */
- continue;
- }
- new_scheduler = get_kist_scheduler();
- scheduler_kist_set_full_mode();
- goto end;
- case SCHEDULER_KIST_LITE:
- new_scheduler = get_kist_scheduler();
- scheduler_kist_set_lite_mode();
- goto end;
- case SCHEDULER_NONE:
- /* fallthrough */
- default:
- /* Our option validation should have caught this. */
- tor_assert_unreached();
- }
- } SMARTLIST_FOREACH_END(type);
- end:
- if (new_scheduler == NULL) {
- log_err(LD_SCHED, "Tor was unable to select a scheduler type. Please "
- "make sure Schedulers is correctly configured with "
- "what Tor does support.");
- /* We weren't able to choose a scheduler which means that none of the ones
- * set in Schedulers are supported or usable. We will respect the user
- * wishes of using what it has been configured and don't do a sneaky
- * fallback. Because this can be changed at runtime, we have to stop tor
- * right now. */
- exit(1); // XXXX bad exit
- }
- /* Set the chosen scheduler. */
- the_scheduler = new_scheduler;
- }
- /**
- * Helper function called from a few different places. It changes the
- * scheduler implementation, if necessary. And if it did, it then tells the
- * old one to free its state and the new one to initialize.
- */
- static void
- set_scheduler(void)
- {
- const scheduler_t *old_scheduler = the_scheduler;
- scheduler_types_t old_scheduler_type = SCHEDULER_NONE;
- /* We keep track of the type in order to log only if the type switched. We
- * can't just use the scheduler pointers because KIST and KISTLite share the
- * same object. */
- if (the_scheduler) {
- old_scheduler_type = the_scheduler->type;
- }
- /* From the options, select the scheduler type to set. */
- select_scheduler();
- tor_assert(the_scheduler);
- /* We look at the pointer difference in case the old sched and new sched
- * share the same scheduler object, as is the case with KIST and KISTLite. */
- if (old_scheduler != the_scheduler) {
- /* Allow the old scheduler to clean up, if needed. */
- if (old_scheduler && old_scheduler->free_all) {
- old_scheduler->free_all();
- }
- /* Initialize the new scheduler. */
- if (the_scheduler->init) {
- the_scheduler->init();
- }
- }
- /* Finally we notice log if we switched schedulers. We use the type in case
- * two schedulers share a scheduler object. */
- if (old_scheduler_type != the_scheduler->type) {
- log_notice(LD_CONFIG, "Scheduler type %s has been enabled.",
- get_scheduler_type_string(the_scheduler->type));
- }
- }
- /*****************************************************************************
- * Scheduling system private function definitions
- *
- * Functions that can only be accessed from scheduler*.c
- *****************************************************************************/
- /** Return the pending channel list. */
- smartlist_t *
- get_channels_pending(void)
- {
- return channels_pending;
- }
- /** Comparison function to use when sorting pending channels. */
- MOCK_IMPL(int,
- scheduler_compare_channels, (const void *c1_v, const void *c2_v))
- {
- const channel_t *c1 = NULL, *c2 = NULL;
- /* These are a workaround for -Wbad-function-cast throwing a fit */
- const circuitmux_policy_t *p1, *p2;
- uintptr_t p1_i, p2_i;
- tor_assert(c1_v);
- tor_assert(c2_v);
- c1 = (const channel_t *)(c1_v);
- c2 = (const channel_t *)(c2_v);
- if (c1 != c2) {
- if (circuitmux_get_policy(c1->cmux) ==
- circuitmux_get_policy(c2->cmux)) {
- /* Same cmux policy, so use the mux comparison */
- return circuitmux_compare_muxes(c1->cmux, c2->cmux);
- } else {
- /*
- * Different policies; not important to get this edge case perfect
- * because the current code never actually gives different channels
- * different cmux policies anyway. Just use this arbitrary but
- * definite choice.
- */
- p1 = circuitmux_get_policy(c1->cmux);
- p2 = circuitmux_get_policy(c2->cmux);
- p1_i = (uintptr_t)p1;
- p2_i = (uintptr_t)p2;
- return (p1_i < p2_i) ? -1 : 1;
- }
- } else {
- /* c1 == c2, so always equal */
- return 0;
- }
- }
- /*****************************************************************************
- * Scheduling system global functions
- *
- * Functions that can be accessed from anywhere in Tor.
- *****************************************************************************/
- /**
- * This is how the scheduling system is notified of Tor's configuration
- * changing. For example: a SIGHUP was issued.
- */
- void
- scheduler_conf_changed(void)
- {
- /* Let the scheduler decide what it should do. */
- set_scheduler();
- /* Then tell the (possibly new) scheduler that we have new options. */
- if (the_scheduler->on_new_options) {
- the_scheduler->on_new_options();
- }
- }
- /**
- * Whenever we get a new consensus, this function is called.
- */
- void
- scheduler_notify_networkstatus_changed(const networkstatus_t *old_c,
- const networkstatus_t *new_c)
- {
- /* Maybe the consensus param made us change the scheduler. */
- set_scheduler();
- /* Then tell the (possibly new) scheduler that we have a new consensus */
- if (the_scheduler->on_new_consensus) {
- the_scheduler->on_new_consensus(old_c, new_c);
- }
- }
- /**
- * Free everything scheduling-related from main.c. Note this is only called
- * when Tor is shutting down, while scheduler_t->free_all() is called both when
- * Tor is shutting down and when we are switching schedulers.
- */
- void
- scheduler_free_all(void)
- {
- log_debug(LD_SCHED, "Shutting down scheduler");
- if (run_sched_ev) {
- if (event_del(run_sched_ev) < 0) {
- log_warn(LD_BUG, "Problem deleting run_sched_ev");
- }
- tor_event_free(run_sched_ev);
- run_sched_ev = NULL;
- }
- if (channels_pending) {
- /* We don't have ownership of the objects in this list. */
- smartlist_free(channels_pending);
- channels_pending = NULL;
- }
- if (the_scheduler && the_scheduler->free_all) {
- the_scheduler->free_all();
- }
- the_scheduler = NULL;
- }
- /** Mark a channel as no longer ready to accept writes. */
- MOCK_IMPL(void,
- scheduler_channel_doesnt_want_writes,(channel_t *chan))
- {
- IF_BUG_ONCE(!chan) {
- return;
- }
- IF_BUG_ONCE(!channels_pending) {
- return;
- }
- /* If it's already in pending, we can put it in waiting_to_write */
- if (chan->scheduler_state == SCHED_CHAN_PENDING) {
- /*
- * It's in channels_pending, so it shouldn't be in any of
- * the other lists. It can't write any more, so it goes to
- * channels_waiting_to_write.
- */
- smartlist_pqueue_remove(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p went from pending "
- "to waiting_to_write",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- } else {
- /*
- * It's not in pending, so it can't become waiting_to_write; it's
- * either not in any of the lists (nothing to do) or it's already in
- * waiting_for_cells (remove it, can't write any more).
- */
- if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
- chan->scheduler_state = SCHED_CHAN_IDLE;
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p left waiting_for_cells",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- }
- }
- }
- /** Mark a channel as having waiting cells. */
- MOCK_IMPL(void,
- scheduler_channel_has_waiting_cells,(channel_t *chan))
- {
- IF_BUG_ONCE(!chan) {
- return;
- }
- IF_BUG_ONCE(!channels_pending) {
- return;
- }
- /* First, check if it's also writeable */
- if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
- /*
- * It's in channels_waiting_for_cells, so it shouldn't be in any of
- * the other lists. It has waiting cells now, so it goes to
- * channels_pending.
- */
- chan->scheduler_state = SCHED_CHAN_PENDING;
- smartlist_pqueue_add(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p went from waiting_for_cells "
- "to pending",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- /* If we made a channel pending, we potentially have scheduling work to
- * do. */
- the_scheduler->schedule();
- } else {
- /*
- * It's not in waiting_for_cells, so it can't become pending; it's
- * either not in any of the lists (we add it to waiting_to_write)
- * or it's already in waiting_to_write or pending (we do nothing)
- */
- if (!(chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE ||
- chan->scheduler_state == SCHED_CHAN_PENDING)) {
- chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p entered waiting_to_write",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- }
- }
- }
- /** Add the scheduler event to the set of pending events with next_run being
- * the longest time libevent should wait before triggering the event. */
- void
- scheduler_ev_add(const struct timeval *next_run)
- {
- tor_assert(run_sched_ev);
- tor_assert(next_run);
- if (BUG(event_add(run_sched_ev, next_run) < 0)) {
- log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: "
- "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec);
- return;
- }
- }
- /** Make the scheduler event active with the given flags. */
- void
- scheduler_ev_active(int flags)
- {
- tor_assert(run_sched_ev);
- event_active(run_sched_ev, flags, 1);
- }
- /*
- * Initialize everything scheduling-related from config.c. Note this is only
- * called when Tor is starting up, while scheduler_t->init() is called both
- * when Tor is starting up and when we are switching schedulers.
- */
- void
- scheduler_init(void)
- {
- log_debug(LD_SCHED, "Initting scheduler");
- // Two '!' because we really do want to check if the pointer is non-NULL
- IF_BUG_ONCE(!!run_sched_ev) {
- log_warn(LD_SCHED, "We should not already have a libevent scheduler event."
- "I'll clean the old one up, but this is odd.");
- tor_event_free(run_sched_ev);
- run_sched_ev = NULL;
- }
- run_sched_ev = tor_event_new(tor_libevent_get_base(), -1,
- 0, scheduler_evt_callback, NULL);
- channels_pending = smartlist_new();
- set_scheduler();
- }
- /*
- * If a channel is going away, this is how the scheduling system is informed
- * so it can do any freeing necessary. This ultimately calls
- * scheduler_t->on_channel_free() so the current scheduler can release any
- * state specific to this channel.
- */
- MOCK_IMPL(void,
- scheduler_release_channel,(channel_t *chan))
- {
- IF_BUG_ONCE(!chan) {
- return;
- }
- IF_BUG_ONCE(!channels_pending) {
- return;
- }
- if (chan->scheduler_state == SCHED_CHAN_PENDING) {
- if (smartlist_pos(channels_pending, chan) == -1) {
- log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " "
- "but it wasn't in channels_pending",
- chan->global_identifier);
- } else {
- smartlist_pqueue_remove(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- }
- }
- if (the_scheduler->on_channel_free) {
- the_scheduler->on_channel_free(chan);
- }
- chan->scheduler_state = SCHED_CHAN_IDLE;
- }
- /** Mark a channel as ready to accept writes */
- void
- scheduler_channel_wants_writes(channel_t *chan)
- {
- IF_BUG_ONCE(!chan) {
- return;
- }
- IF_BUG_ONCE(!channels_pending) {
- return;
- }
- /* If it's already in waiting_to_write, we can put it in pending */
- if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) {
- /*
- * It can write now, so it goes to channels_pending.
- */
- log_debug(LD_SCHED, "chan=%" PRIu64 " became pending",
- chan->global_identifier);
- smartlist_pqueue_add(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- chan->scheduler_state = SCHED_CHAN_PENDING;
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p went from waiting_to_write "
- "to pending",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- /* We just made a channel pending, we have scheduling work to do. */
- the_scheduler->schedule();
- } else {
- /*
- * It's not in SCHED_CHAN_WAITING_TO_WRITE, so it can't become pending;
- * it's either idle and goes to WAITING_FOR_CELLS, or it's a no-op.
- */
- if (!(chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS ||
- chan->scheduler_state == SCHED_CHAN_PENDING)) {
- chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS;
- log_debug(LD_SCHED,
- "Channel " U64_FORMAT " at %p entered waiting_for_cells",
- U64_PRINTF_ARG(chan->global_identifier), chan);
- }
- }
- }
- #ifdef TOR_UNIT_TESTS
- /*
- * Notify scheduler that a channel's queue position may have changed.
- */
- void
- scheduler_touch_channel(channel_t *chan)
- {
- IF_BUG_ONCE(!chan) {
- return;
- }
- if (chan->scheduler_state == SCHED_CHAN_PENDING) {
- /* Remove and re-add it */
- smartlist_pqueue_remove(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- smartlist_pqueue_add(channels_pending,
- scheduler_compare_channels,
- offsetof(channel_t, sched_heap_idx),
- chan);
- }
- /* else no-op, since it isn't in the queue */
- }
- #endif /* defined(TOR_UNIT_TESTS) */
|