scheduler.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /* * Copyright (c) 2013-2017, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "or.h"
  4. #include "config.h"
  5. #include "compat_libevent.h"
  6. #define SCHEDULER_PRIVATE_
  7. #include "scheduler.h"
  8. #include <event2/event.h>
  9. /**
  10. * \file scheduler.c
  11. * \brief Channel scheduling system: decides which channels should send and
  12. * receive when.
  13. *
  14. * This module is the global/common parts of the scheduling system. This system
  15. * is what decides what channels get to send cells on their circuits and when.
  16. *
  17. * Terms:
  18. * - "Scheduling system": the collection of scheduler*.{h,c} files and their
  19. * aggregate behavior.
  20. * - "Scheduler implementation": a scheduler_t. The scheduling system has one
  21. * active scheduling implementation at a time.
  22. *
  23. * In this file you will find state that any scheduler implmentation can have
  24. * access to as well as the functions the rest of Tor uses to interact with the
  25. * scheduling system.
  26. *
  27. * The earliest versions of Tor approximated a kind of round-robin system
  28. * among active connections, but only approximated it. It would only consider
  29. * one connection (roughly equal to a channel in today's terms) at a time, and
  30. * thus could only prioritize circuits against others on the same connection.
  31. *
  32. * Then in response to the KIST paper[0], Tor implemented a global
  33. * circuit scheduler. It was supposed to prioritize circuits across man
  34. * channels, but wasn't effective. It is preserved in scheduler_vanilla.c.
  35. *
  36. * [0]: http://www.robgjansen.com/publications/kist-sec2014.pdf
  37. *
  38. * Then we actually got around to implementing KIST for real. We decided to
  39. * modularize the scheduler so new ones can be implemented. You can find KIST
  40. * in scheduler_kist.c.
  41. *
  42. * Channels have one of four scheduling states based on whether or not they
  43. * have cells to send and whether or not they are able to send.
  44. *
  45. * <ol>
  46. * <li>
  47. * Not open for writes, no cells to send.
  48. * <ul><li> Not much to do here, and the channel will have scheduler_state
  49. * == SCHED_CHAN_IDLE
  50. * <li> Transitions from:
  51. * <ul>
  52. * <li>Open for writes/has cells by simultaneously draining all circuit
  53. * queues and filling the output buffer.
  54. * </ul>
  55. * <li> Transitions to:
  56. * <ul>
  57. * <li> Not open for writes/has cells by arrival of cells on an attached
  58. * circuit (this would be driven from append_cell_to_circuit_queue())
  59. * <li> Open for writes/no cells by a channel type specific path;
  60. * driven from connection_or_flushed_some() for channel_tls_t.
  61. * </ul>
  62. * </ul>
  63. *
  64. * <li> Open for writes, no cells to send
  65. * <ul>
  66. * <li>Not much here either; this will be the state an idle but open
  67. * channel can be expected to settle in. It will have scheduler_state
  68. * == SCHED_CHAN_WAITING_FOR_CELLS
  69. * <li> Transitions from:
  70. * <ul>
  71. * <li>Not open for writes/no cells by flushing some of the output
  72. * buffer.
  73. * <li>Open for writes/has cells by the scheduler moving cells from
  74. * circuit queues to channel output queue, but not having enough
  75. * to fill the output queue.
  76. * </ul>
  77. * <li> Transitions to:
  78. * <ul>
  79. * <li>Open for writes/has cells by arrival of new cells on an attached
  80. * circuit, in append_cell_to_circuit_queue()
  81. * </ul>
  82. * </ul>
  83. *
  84. * <li>Not open for writes, cells to send
  85. * <ul>
  86. * <li>This is the state of a busy circuit limited by output bandwidth;
  87. * cells have piled up in the circuit queues waiting to be relayed.
  88. * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE.
  89. * <li> Transitions from:
  90. * <ul>
  91. * <li>Not open for writes/no cells by arrival of cells on an attached
  92. * circuit
  93. * <li> Open for writes/has cells by filling an output buffer without
  94. * draining all cells from attached circuits
  95. * </ul>
  96. * <li> Transitions to:
  97. * <ul>
  98. * <li>Opens for writes/has cells by draining some of the output buffer
  99. * via the connection_or_flushed_some() path (for channel_tls_t).
  100. * </ul>
  101. * </ul>
  102. *
  103. * <li>Open for writes, cells to send
  104. * <ul>
  105. * <li>This connection is ready to relay some cells and waiting for
  106. * the scheduler to choose it. The channel will have scheduler_state ==
  107. * SCHED_CHAN_PENDING.
  108. * <li>Transitions from:
  109. * <ul>
  110. * <li> Not open for writes/has cells by the connection_or_flushed_some()
  111. * path
  112. * <li> Open for writes/no cells by the append_cell_to_circuit_queue()
  113. * path
  114. * </ul>
  115. * <li> Transitions to:
  116. * <ul>
  117. * <li>Not open for writes/no cells by draining all circuit queues and
  118. * simultaneously filling the output buffer.
  119. * <li>Not open for writes/has cells by writing enough cells to fill the
  120. * output buffer
  121. * <li>Open for writes/no cells by draining all attached circuit queues
  122. * without also filling the output buffer
  123. * </ul>
  124. * </ul>
  125. * </ol>
  126. *
  127. * Other event-driven parts of the code move channels between these scheduling
  128. * states by calling scheduler functions. The scheduling system builds up a
  129. * list of channels in the SCHED_CHAN_PENDING state that the scheduler
  130. * implementation should then use when it runs. Scheduling implementations need
  131. * to properly update channel states during their scheduler_t->run() function
  132. * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING
  133. * to any other state.
  134. *
  135. * The remainder of this file is a small amount of state that any scheduler
  136. * implementation should have access to, and the functions the rest of Tor uses
  137. * to interact with the scheduling system.
  138. */
  139. /*****************************************************************************
  140. * Scheduling system state
  141. *
  142. * State that can be accessed from any scheduler implementation (but not
  143. * outside the scheduling system)
  144. *****************************************************************************/
  145. STATIC scheduler_t *the_scheduler;
  146. /*
  147. * We keep a list of channels that are pending - i.e, have cells to write
  148. * and can accept them to send. The enum scheduler_state in channel_t
  149. * is reserved for our use.
  150. *
  151. * Priority queue of channels that can write and have cells (pending work)
  152. */
  153. STATIC smartlist_t *channels_pending = NULL;
  154. /*
  155. * This event runs the scheduler from its callback, and is manually
  156. * activated whenever a channel enters open for writes/cells to send.
  157. */
  158. STATIC struct event *run_sched_ev = NULL;
  159. /*****************************************************************************
  160. * Scheduling system static function definitions
  161. *
  162. * Functions that can only be accessed from this file.
  163. *****************************************************************************/
  164. /*
  165. * Scheduler event callback; this should get triggered once per event loop
  166. * if any scheduling work was created during the event loop.
  167. */
  168. static void
  169. scheduler_evt_callback(evutil_socket_t fd, short events, void *arg)
  170. {
  171. (void) fd;
  172. (void) events;
  173. (void) arg;
  174. log_debug(LD_SCHED, "Scheduler event callback called");
  175. /* Run the scheduler. This is a mandatory function. */
  176. /* We might as well assert on this. If this function doesn't exist, no cells
  177. * are getting scheduled. Things are very broken. scheduler_t says the run()
  178. * function is mandatory. */
  179. tor_assert(the_scheduler->run);
  180. the_scheduler->run();
  181. /* Schedule itself back in if it has more work. */
  182. /* Again, might as well assert on this mandatory scheduler_t function. If it
  183. * doesn't exist, there's no way to tell libevent to run the scheduler again
  184. * in the future. */
  185. tor_assert(the_scheduler->schedule);
  186. the_scheduler->schedule();
  187. }
  188. /*****************************************************************************
  189. * Scheduling system private function definitions
  190. *
  191. * Functions that can only be accessed from scheduler*.c
  192. *****************************************************************************/
  193. /* Return the pending channel list. */
  194. smartlist_t *
  195. get_channels_pending(void)
  196. {
  197. return channels_pending;
  198. }
  199. /* Return our libevent scheduler event. */
  200. struct event *
  201. get_run_sched_ev(void)
  202. {
  203. return run_sched_ev;
  204. }
  205. /* Comparison function to use when sorting pending channels */
  206. MOCK_IMPL(int,
  207. scheduler_compare_channels, (const void *c1_v, const void *c2_v))
  208. {
  209. const channel_t *c1 = NULL, *c2 = NULL;
  210. /* These are a workaround for -Wbad-function-cast throwing a fit */
  211. const circuitmux_policy_t *p1, *p2;
  212. uintptr_t p1_i, p2_i;
  213. c1 = (const channel_t *)(c1_v);
  214. c2 = (const channel_t *)(c2_v);
  215. IF_BUG_ONCE(!c1 || !c2) {
  216. if (c1 && !c2) {
  217. return -1;
  218. } else if (c2 && !c1) {
  219. return 1;
  220. } else {
  221. return -1;
  222. }
  223. }
  224. if (c1 != c2) {
  225. if (circuitmux_get_policy(c1->cmux) ==
  226. circuitmux_get_policy(c2->cmux)) {
  227. /* Same cmux policy, so use the mux comparison */
  228. return circuitmux_compare_muxes(c1->cmux, c2->cmux);
  229. } else {
  230. /*
  231. * Different policies; not important to get this edge case perfect
  232. * because the current code never actually gives different channels
  233. * different cmux policies anyway. Just use this arbitrary but
  234. * definite choice.
  235. */
  236. p1 = circuitmux_get_policy(c1->cmux);
  237. p2 = circuitmux_get_policy(c2->cmux);
  238. p1_i = (uintptr_t)p1;
  239. p2_i = (uintptr_t)p2;
  240. return (p1_i < p2_i) ? -1 : 1;
  241. }
  242. } else {
  243. /* c1 == c2, so always equal */
  244. return 0;
  245. }
  246. }
  247. /*****************************************************************************
  248. * Scheduling system global functions
  249. *
  250. * Functions that can be accessed from anywhere in Tor.
  251. *****************************************************************************/
  252. /* Using the global options, select the scheduler we should be using. */
  253. static void
  254. select_scheduler(void)
  255. {
  256. const char *chosen_sched_type = NULL;
  257. /* This list is ordered that is first entry has the first priority. Thus, as
  258. * soon as we find a scheduler type that we can use, we use it and stop. */
  259. SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) {
  260. switch (*type) {
  261. case SCHEDULER_VANILLA:
  262. the_scheduler = get_vanilla_scheduler();
  263. chosen_sched_type = "Vanilla";
  264. goto end;
  265. case SCHEDULER_KIST:
  266. if (!scheduler_can_use_kist()) {
  267. log_warn(LD_SCHED, "Scheduler KIST can't be used. Consider removing "
  268. "it from Schedulers or if you have a tor built "
  269. "with KIST support, you should make sure "
  270. "KISTSchedRunInterval is a non zero value");
  271. continue;
  272. }
  273. the_scheduler = get_kist_scheduler();
  274. chosen_sched_type = "KIST";
  275. scheduler_kist_set_full_mode();
  276. goto end;
  277. case SCHEDULER_KIST_LITE:
  278. chosen_sched_type = "KISTLite";
  279. the_scheduler = get_kist_scheduler();
  280. scheduler_kist_set_lite_mode();
  281. goto end;
  282. default:
  283. /* Our option validation should have caught this. */
  284. tor_assert_unreached();
  285. }
  286. } SMARTLIST_FOREACH_END(type);
  287. end:
  288. log_notice(LD_CONFIG, "Scheduler type %s has been enabled.",
  289. chosen_sched_type);
  290. }
  291. /*
  292. * Little helper function called from a few different places. It changes the
  293. * scheduler implementation, if necessary. And if it did, it then tells the
  294. * old one to free its state and the new one to initialize.
  295. */
  296. static void
  297. set_scheduler(void)
  298. {
  299. scheduler_t *old_scheduler = the_scheduler;
  300. /* From the options, select the scheduler type to set. */
  301. select_scheduler();
  302. if (old_scheduler != the_scheduler) {
  303. /* Allow the old scheduler to clean up, if needed. */
  304. if (old_scheduler && old_scheduler->free_all) {
  305. old_scheduler->free_all();
  306. }
  307. /* We don't clean up the old scheduler_t. We keep any type of scheduler
  308. * we've allocated so we can do an easy switch back. */
  309. /* Initialize the new scheduler. */
  310. if (the_scheduler->init) {
  311. the_scheduler->init();
  312. }
  313. }
  314. }
  315. /*
  316. * This is how the scheduling system is notified of Tor's configuration
  317. * changing. For example: a SIGHUP was issued.
  318. */
  319. void
  320. scheduler_conf_changed(void)
  321. {
  322. /* Let the scheduler decide what it should do. */
  323. set_scheduler();
  324. /* Then tell the (possibly new) scheduler that we have new options. */
  325. if (the_scheduler->on_new_options) {
  326. the_scheduler->on_new_options();
  327. }
  328. }
  329. /*
  330. * Whenever we get a new consensus, this function is called.
  331. */
  332. void
  333. scheduler_notify_networkstatus_changed(const networkstatus_t *old_c,
  334. const networkstatus_t *new_c)
  335. {
  336. /* Then tell the (possibly new) scheduler that we have a new consensus */
  337. if (the_scheduler->on_new_consensus) {
  338. the_scheduler->on_new_consensus(old_c, new_c);
  339. }
  340. /* Maybe the consensus param made us change the scheduler. */
  341. set_scheduler();
  342. }
  343. /*
  344. * Free everything scheduling-related from main.c. Note this is only called
  345. * when Tor is shutting down, while scheduler_t->free_all() is called both when
  346. * Tor is shutting down and when we are switching schedulers.
  347. */
  348. void
  349. scheduler_free_all(void)
  350. {
  351. log_debug(LD_SCHED, "Shutting down scheduler");
  352. if (run_sched_ev) {
  353. if (event_del(run_sched_ev) < 0) {
  354. log_warn(LD_BUG, "Problem deleting run_sched_ev");
  355. }
  356. tor_event_free(run_sched_ev);
  357. run_sched_ev = NULL;
  358. }
  359. if (channels_pending) {
  360. /* We don't have ownership of the object in this list. */
  361. smartlist_free(channels_pending);
  362. channels_pending = NULL;
  363. }
  364. if (the_scheduler && the_scheduler->free_all) {
  365. the_scheduler->free_all();
  366. }
  367. tor_free(the_scheduler);
  368. the_scheduler = NULL;
  369. }
  370. /** Mark a channel as no longer ready to accept writes */
  371. MOCK_IMPL(void,
  372. scheduler_channel_doesnt_want_writes,(channel_t *chan))
  373. {
  374. IF_BUG_ONCE(!chan) {
  375. return;
  376. }
  377. IF_BUG_ONCE(!channels_pending) {
  378. return;
  379. }
  380. /* If it's already in pending, we can put it in waiting_to_write */
  381. if (chan->scheduler_state == SCHED_CHAN_PENDING) {
  382. /*
  383. * It's in channels_pending, so it shouldn't be in any of
  384. * the other lists. It can't write any more, so it goes to
  385. * channels_waiting_to_write.
  386. */
  387. smartlist_pqueue_remove(channels_pending,
  388. scheduler_compare_channels,
  389. offsetof(channel_t, sched_heap_idx),
  390. chan);
  391. chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
  392. log_debug(LD_SCHED,
  393. "Channel " U64_FORMAT " at %p went from pending "
  394. "to waiting_to_write",
  395. U64_PRINTF_ARG(chan->global_identifier), chan);
  396. } else {
  397. /*
  398. * It's not in pending, so it can't become waiting_to_write; it's
  399. * either not in any of the lists (nothing to do) or it's already in
  400. * waiting_for_cells (remove it, can't write any more).
  401. */
  402. if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
  403. chan->scheduler_state = SCHED_CHAN_IDLE;
  404. log_debug(LD_SCHED,
  405. "Channel " U64_FORMAT " at %p left waiting_for_cells",
  406. U64_PRINTF_ARG(chan->global_identifier), chan);
  407. }
  408. }
  409. }
  410. /** Mark a channel as having waiting cells */
  411. MOCK_IMPL(void,
  412. scheduler_channel_has_waiting_cells,(channel_t *chan))
  413. {
  414. IF_BUG_ONCE(!chan) {
  415. return;
  416. }
  417. IF_BUG_ONCE(!channels_pending) {
  418. return;
  419. }
  420. /* First, check if this one also writeable */
  421. if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
  422. /*
  423. * It's in channels_waiting_for_cells, so it shouldn't be in any of
  424. * the other lists. It has waiting cells now, so it goes to
  425. * channels_pending.
  426. */
  427. chan->scheduler_state = SCHED_CHAN_PENDING;
  428. smartlist_pqueue_add(channels_pending,
  429. scheduler_compare_channels,
  430. offsetof(channel_t, sched_heap_idx),
  431. chan);
  432. log_debug(LD_SCHED,
  433. "Channel " U64_FORMAT " at %p went from waiting_for_cells "
  434. "to pending",
  435. U64_PRINTF_ARG(chan->global_identifier), chan);
  436. /* If we made a channel pending, we potentially have scheduling work to
  437. * do. */
  438. the_scheduler->schedule();
  439. } else {
  440. /*
  441. * It's not in waiting_for_cells, so it can't become pending; it's
  442. * either not in any of the lists (we add it to waiting_to_write)
  443. * or it's already in waiting_to_write or pending (we do nothing)
  444. */
  445. if (!(chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE ||
  446. chan->scheduler_state == SCHED_CHAN_PENDING)) {
  447. chan->scheduler_state = SCHED_CHAN_WAITING_TO_WRITE;
  448. log_debug(LD_SCHED,
  449. "Channel " U64_FORMAT " at %p entered waiting_to_write",
  450. U64_PRINTF_ARG(chan->global_identifier), chan);
  451. }
  452. }
  453. }
  454. /*
  455. * Initialize everything scheduling-related from config.c. Note this is only
  456. * called when Tor is starting up, while scheduler_t->init() is called both
  457. * when Tor is starting up and when we are switching schedulers.
  458. */
  459. void
  460. scheduler_init(void)
  461. {
  462. log_debug(LD_SCHED, "Initting scheduler");
  463. // Two '!' because we really do want to check if the pointer is non-NULL
  464. IF_BUG_ONCE(!!run_sched_ev) {
  465. log_warn(LD_SCHED, "We should not already have a libevent scheduler event."
  466. "I'll clean the old one up, but this is odd.");
  467. tor_event_free(run_sched_ev);
  468. run_sched_ev = NULL;
  469. }
  470. run_sched_ev = tor_event_new(tor_libevent_get_base(), -1,
  471. 0, scheduler_evt_callback, NULL);
  472. channels_pending = smartlist_new();
  473. set_scheduler();
  474. }
  475. /*
  476. * If a channel is going away, this is how the scheduling system is informed
  477. * so it can do any freeing necessary. This ultimately calls
  478. * scheduler_t->on_channel_free() so the current scheduler can release any
  479. * state specific to this channel.
  480. */
  481. MOCK_IMPL(void,
  482. scheduler_release_channel,(channel_t *chan))
  483. {
  484. IF_BUG_ONCE(!chan) {
  485. return;
  486. }
  487. IF_BUG_ONCE(!channels_pending) {
  488. return;
  489. }
  490. if (chan->scheduler_state == SCHED_CHAN_PENDING) {
  491. if (smartlist_pos(channels_pending, chan) == -1) {
  492. log_warn(LD_SCHED, "Scheduler asked to release channel %" PRIu64 " "
  493. "but it wasn't in channels_pending",
  494. chan->global_identifier);
  495. } else {
  496. smartlist_pqueue_remove(channels_pending,
  497. scheduler_compare_channels,
  498. offsetof(channel_t, sched_heap_idx),
  499. chan);
  500. }
  501. if (the_scheduler->on_channel_free) {
  502. the_scheduler->on_channel_free(chan);
  503. }
  504. }
  505. chan->scheduler_state = SCHED_CHAN_IDLE;
  506. }
  507. /** Mark a channel as ready to accept writes */
  508. void
  509. scheduler_channel_wants_writes(channel_t *chan)
  510. {
  511. IF_BUG_ONCE(!chan) {
  512. return;
  513. }
  514. IF_BUG_ONCE(!channels_pending) {
  515. return;
  516. }
  517. /* If it's already in waiting_to_write, we can put it in pending */
  518. if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) {
  519. /*
  520. * It can write now, so it goes to channels_pending.
  521. */
  522. log_debug(LD_SCHED, "chan=%" PRIu64 " became pending",
  523. chan->global_identifier);
  524. smartlist_pqueue_add(channels_pending,
  525. scheduler_compare_channels,
  526. offsetof(channel_t, sched_heap_idx),
  527. chan);
  528. chan->scheduler_state = SCHED_CHAN_PENDING;
  529. log_debug(LD_SCHED,
  530. "Channel " U64_FORMAT " at %p went from waiting_to_write "
  531. "to pending",
  532. U64_PRINTF_ARG(chan->global_identifier), chan);
  533. /* We just made a channel pending, we have scheduling work to do. */
  534. the_scheduler->schedule();
  535. } else {
  536. /*
  537. * It's not in SCHED_CHAN_WAITING_TO_WRITE, so it can't become pending;
  538. * it's either idle and goes to WAITING_FOR_CELLS, or it's a no-op.
  539. */
  540. if (!(chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS ||
  541. chan->scheduler_state == SCHED_CHAN_PENDING)) {
  542. chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS;
  543. log_debug(LD_SCHED,
  544. "Channel " U64_FORMAT " at %p entered waiting_for_cells",
  545. U64_PRINTF_ARG(chan->global_identifier), chan);
  546. }
  547. }
  548. }
  549. #ifdef TOR_UNIT_TESTS
  550. /*
  551. * Notify scheduler that a channel's queue position may have changed.
  552. */
  553. void
  554. scheduler_touch_channel(channel_t *chan)
  555. {
  556. IF_BUG_ONCE(!chan) {
  557. return;
  558. }
  559. if (chan->scheduler_state == SCHED_CHAN_PENDING) {
  560. /* Remove and re-add it */
  561. smartlist_pqueue_remove(channels_pending,
  562. scheduler_compare_channels,
  563. offsetof(channel_t, sched_heap_idx),
  564. chan);
  565. smartlist_pqueue_add(channels_pending,
  566. scheduler_compare_channels,
  567. offsetof(channel_t, sched_heap_idx),
  568. chan);
  569. }
  570. /* else no-op, since it isn't in the queue */
  571. }
  572. #endif /* TOR_UNIT_TESTS */