circuitstats.c 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569
  1. /* Copyright (c) 2001 Matej Pfajfar.
  2. * Copyright (c) 2001-2004, Roger Dingledine.
  3. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  4. * Copyright (c) 2007-2012, The Tor Project, Inc. */
  5. /* See LICENSE for licensing information */
  6. #define CIRCUITSTATS_PRIVATE
  7. #include "or.h"
  8. #include "circuitbuild.h"
  9. #include "circuitstats.h"
  10. #include "config.h"
  11. #include "confparse.h"
  12. #include "control.h"
  13. #include "networkstatus.h"
  14. #include "statefile.h"
  15. #undef log
  16. #include <math.h>
  17. #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
  18. /** Global list of circuit build times */
  19. // XXXX: Add this as a member for entry_guard_t instead of global?
  20. // Then we could do per-guard statistics, as guards are likely to
  21. // vary in their own latency. The downside of this is that guards
  22. // can change frequently, so we'd be building a lot more circuits
  23. // most likely.
  24. /* XXXX024 Make this static; add accessor functions. */
  25. circuit_build_times_t circ_times;
  26. /** If set, we're running the unit tests: we should avoid clobbering
  27. * our state file or accessing get_options() or get_or_state() */
  28. static int unit_tests = 0;
  29. /**
  30. * This function decides if CBT learning should be disabled. It returns
  31. * true if one or more of the following four conditions are met:
  32. *
  33. * 1. If the cbtdisabled consensus parameter is set.
  34. * 2. If the torrc option LearnCircuitBuildTimeout is false.
  35. * 3. If we are a directory authority
  36. * 4. If we fail to write circuit build time history to our state file.
  37. */
  38. int
  39. circuit_build_times_disabled(void)
  40. {
  41. if (unit_tests) {
  42. return 0;
  43. } else {
  44. int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled",
  45. 0, 0, 1);
  46. int config_disabled = !get_options()->LearnCircuitBuildTimeout;
  47. int dirauth_disabled = get_options()->AuthoritativeDir;
  48. int state_disabled = did_last_state_file_write_fail() ? 1 : 0;
  49. if (consensus_disabled || config_disabled || dirauth_disabled ||
  50. state_disabled) {
  51. log_debug(LD_CIRC,
  52. "CircuitBuildTime learning is disabled. "
  53. "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
  54. consensus_disabled, config_disabled, dirauth_disabled,
  55. state_disabled);
  56. return 1;
  57. } else {
  58. log_debug(LD_CIRC,
  59. "CircuitBuildTime learning is not disabled. "
  60. "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
  61. consensus_disabled, config_disabled, dirauth_disabled,
  62. state_disabled);
  63. return 0;
  64. }
  65. }
  66. }
  67. /**
  68. * Retrieve and bounds-check the cbtmaxtimeouts consensus paramter.
  69. *
  70. * Effect: When this many timeouts happen in the last 'cbtrecentcount'
  71. * circuit attempts, the client should discard all of its history and
  72. * begin learning a fresh timeout value.
  73. */
  74. static int32_t
  75. circuit_build_times_max_timeouts(void)
  76. {
  77. int32_t cbt_maxtimeouts;
  78. cbt_maxtimeouts = networkstatus_get_param(NULL, "cbtmaxtimeouts",
  79. CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT,
  80. CBT_MIN_MAX_RECENT_TIMEOUT_COUNT,
  81. CBT_MAX_MAX_RECENT_TIMEOUT_COUNT);
  82. if (!(get_options()->LearnCircuitBuildTimeout)) {
  83. log_debug(LD_BUG,
  84. "circuit_build_times_max_timeouts() called, cbtmaxtimeouts is"
  85. " %d",
  86. cbt_maxtimeouts);
  87. }
  88. return cbt_maxtimeouts;
  89. }
  90. /**
  91. * Retrieve and bounds-check the cbtnummodes consensus paramter.
  92. *
  93. * Effect: This value governs how many modes to use in the weighted
  94. * average calculation of Pareto parameter Xm. A value of 3 introduces
  95. * some bias (2-5% of CDF) under ideal conditions, but allows for better
  96. * performance in the event that a client chooses guard nodes of radically
  97. * different performance characteristics.
  98. */
  99. static int32_t
  100. circuit_build_times_default_num_xm_modes(void)
  101. {
  102. int32_t num = networkstatus_get_param(NULL, "cbtnummodes",
  103. CBT_DEFAULT_NUM_XM_MODES,
  104. CBT_MIN_NUM_XM_MODES,
  105. CBT_MAX_NUM_XM_MODES);
  106. if (!(get_options()->LearnCircuitBuildTimeout)) {
  107. log_debug(LD_BUG,
  108. "circuit_build_times_default_num_xm_modes() called, cbtnummodes"
  109. " is %d",
  110. num);
  111. }
  112. return num;
  113. }
  114. /**
  115. * Retrieve and bounds-check the cbtmincircs consensus paramter.
  116. *
  117. * Effect: This is the minimum number of circuits to build before
  118. * computing a timeout.
  119. */
  120. static int32_t
  121. circuit_build_times_min_circs_to_observe(void)
  122. {
  123. int32_t num = networkstatus_get_param(NULL, "cbtmincircs",
  124. CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE,
  125. CBT_MIN_MIN_CIRCUITS_TO_OBSERVE,
  126. CBT_MAX_MIN_CIRCUITS_TO_OBSERVE);
  127. if (!(get_options()->LearnCircuitBuildTimeout)) {
  128. log_debug(LD_BUG,
  129. "circuit_build_times_min_circs_to_observe() called, cbtmincircs"
  130. " is %d",
  131. num);
  132. }
  133. return num;
  134. }
  135. /** Return true iff <b>cbt</b> has recorded enough build times that we
  136. * want to start acting on the timeout it implies. */
  137. int
  138. circuit_build_times_enough_to_compute(circuit_build_times_t *cbt)
  139. {
  140. return cbt->total_build_times >= circuit_build_times_min_circs_to_observe();
  141. }
  142. /**
  143. * Retrieve and bounds-check the cbtquantile consensus paramter.
  144. *
  145. * Effect: This is the position on the quantile curve to use to set the
  146. * timeout value. It is a percent (10-99).
  147. */
  148. double
  149. circuit_build_times_quantile_cutoff(void)
  150. {
  151. int32_t num = networkstatus_get_param(NULL, "cbtquantile",
  152. CBT_DEFAULT_QUANTILE_CUTOFF,
  153. CBT_MIN_QUANTILE_CUTOFF,
  154. CBT_MAX_QUANTILE_CUTOFF);
  155. if (!(get_options()->LearnCircuitBuildTimeout)) {
  156. log_debug(LD_BUG,
  157. "circuit_build_times_quantile_cutoff() called, cbtquantile"
  158. " is %d",
  159. num);
  160. }
  161. return num/100.0;
  162. }
  163. /* DOCDOC circuit_build_times_get_bw_scale */
  164. int
  165. circuit_build_times_get_bw_scale(networkstatus_t *ns)
  166. {
  167. return networkstatus_get_param(ns, "bwweightscale",
  168. BW_WEIGHT_SCALE,
  169. BW_MIN_WEIGHT_SCALE,
  170. BW_MAX_WEIGHT_SCALE);
  171. }
  172. /**
  173. * Retrieve and bounds-check the cbtclosequantile consensus paramter.
  174. *
  175. * Effect: This is the position on the quantile curve to use to set the
  176. * timeout value to use to actually close circuits. It is a percent
  177. * (0-99).
  178. */
  179. static double
  180. circuit_build_times_close_quantile(void)
  181. {
  182. int32_t param;
  183. /* Cast is safe - circuit_build_times_quantile_cutoff() is capped */
  184. int32_t min = (int)tor_lround(100*circuit_build_times_quantile_cutoff());
  185. param = networkstatus_get_param(NULL, "cbtclosequantile",
  186. CBT_DEFAULT_CLOSE_QUANTILE,
  187. CBT_MIN_CLOSE_QUANTILE,
  188. CBT_MAX_CLOSE_QUANTILE);
  189. if (!(get_options()->LearnCircuitBuildTimeout)) {
  190. log_debug(LD_BUG,
  191. "circuit_build_times_close_quantile() called, cbtclosequantile"
  192. " is %d", param);
  193. }
  194. if (param < min) {
  195. log_warn(LD_DIR, "Consensus parameter cbtclosequantile is "
  196. "too small, raising to %d", min);
  197. param = min;
  198. }
  199. return param / 100.0;
  200. }
  201. /**
  202. * Retrieve and bounds-check the cbttestfreq consensus paramter.
  203. *
  204. * Effect: Describes how often in seconds to build a test circuit to
  205. * gather timeout values. Only applies if less than 'cbtmincircs'
  206. * have been recorded.
  207. */
  208. static int32_t
  209. circuit_build_times_test_frequency(void)
  210. {
  211. int32_t num = networkstatus_get_param(NULL, "cbttestfreq",
  212. CBT_DEFAULT_TEST_FREQUENCY,
  213. CBT_MIN_TEST_FREQUENCY,
  214. CBT_MAX_TEST_FREQUENCY);
  215. if (!(get_options()->LearnCircuitBuildTimeout)) {
  216. log_debug(LD_BUG,
  217. "circuit_build_times_test_frequency() called, cbttestfreq is %d",
  218. num);
  219. }
  220. return num;
  221. }
  222. /**
  223. * Retrieve and bounds-check the cbtmintimeout consensus parameter.
  224. *
  225. * Effect: This is the minimum allowed timeout value in milliseconds.
  226. * The minimum is to prevent rounding to 0 (we only check once
  227. * per second).
  228. */
  229. static int32_t
  230. circuit_build_times_min_timeout(void)
  231. {
  232. int32_t num = networkstatus_get_param(NULL, "cbtmintimeout",
  233. CBT_DEFAULT_TIMEOUT_MIN_VALUE,
  234. CBT_MIN_TIMEOUT_MIN_VALUE,
  235. CBT_MAX_TIMEOUT_MIN_VALUE);
  236. if (!(get_options()->LearnCircuitBuildTimeout)) {
  237. log_debug(LD_BUG,
  238. "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
  239. num);
  240. }
  241. return num;
  242. }
  243. /**
  244. * Retrieve and bounds-check the cbtinitialtimeout consensus paramter.
  245. *
  246. * Effect: This is the timeout value to use before computing a timeout,
  247. * in milliseconds.
  248. */
  249. int32_t
  250. circuit_build_times_initial_timeout(void)
  251. {
  252. int32_t min = circuit_build_times_min_timeout();
  253. int32_t param = networkstatus_get_param(NULL, "cbtinitialtimeout",
  254. CBT_DEFAULT_TIMEOUT_INITIAL_VALUE,
  255. CBT_MIN_TIMEOUT_INITIAL_VALUE,
  256. CBT_MAX_TIMEOUT_INITIAL_VALUE);
  257. if (!(get_options()->LearnCircuitBuildTimeout)) {
  258. log_debug(LD_BUG,
  259. "circuit_build_times_initial_timeout() called, "
  260. "cbtinitialtimeout is %d",
  261. param);
  262. }
  263. if (param < min) {
  264. log_warn(LD_DIR, "Consensus parameter cbtinitialtimeout is too small, "
  265. "raising to %d", min);
  266. param = min;
  267. }
  268. return param;
  269. }
  270. /**
  271. * Retrieve and bounds-check the cbtrecentcount consensus paramter.
  272. *
  273. * Effect: This is the number of circuit build times to keep track of
  274. * for deciding if we hit cbtmaxtimeouts and need to reset our state
  275. * and learn a new timeout.
  276. */
  277. static int32_t
  278. circuit_build_times_recent_circuit_count(networkstatus_t *ns)
  279. {
  280. int32_t num;
  281. num = networkstatus_get_param(ns, "cbtrecentcount",
  282. CBT_DEFAULT_RECENT_CIRCUITS,
  283. CBT_MIN_RECENT_CIRCUITS,
  284. CBT_MAX_RECENT_CIRCUITS);
  285. if (!(get_options()->LearnCircuitBuildTimeout)) {
  286. log_debug(LD_BUG,
  287. "circuit_build_times_recent_circuit_count() called, "
  288. "cbtrecentcount is %d",
  289. num);
  290. }
  291. return num;
  292. }
  293. /**
  294. * This function is called when we get a consensus update.
  295. *
  296. * It checks to see if we have changed any consensus parameters
  297. * that require reallocation or discard of previous stats.
  298. */
  299. void
  300. circuit_build_times_new_consensus_params(circuit_build_times_t *cbt,
  301. networkstatus_t *ns)
  302. {
  303. int32_t num;
  304. /*
  305. * First check if we're doing adaptive timeouts at all; nothing to
  306. * update if we aren't.
  307. */
  308. if (!circuit_build_times_disabled()) {
  309. num = circuit_build_times_recent_circuit_count(ns);
  310. if (num > 0) {
  311. if (num != cbt->liveness.num_recent_circs) {
  312. int8_t *recent_circs;
  313. log_notice(LD_CIRC, "The Tor Directory Consensus has changed how many "
  314. "circuits we must track to detect network failures from %d "
  315. "to %d.", cbt->liveness.num_recent_circs, num);
  316. tor_assert(cbt->liveness.timeouts_after_firsthop ||
  317. cbt->liveness.num_recent_circs == 0);
  318. /*
  319. * Technically this is a circular array that we are reallocating
  320. * and memcopying. However, since it only consists of either 1s
  321. * or 0s, and is only used in a statistical test to determine when
  322. * we should discard our history after a sufficient number of 1's
  323. * have been reached, it is fine if order is not preserved or
  324. * elements are lost.
  325. *
  326. * cbtrecentcount should only be changing in cases of severe network
  327. * distress anyway, so memory correctness here is paramount over
  328. * doing acrobatics to preserve the array.
  329. */
  330. recent_circs = tor_malloc_zero(sizeof(int8_t)*num);
  331. if (cbt->liveness.timeouts_after_firsthop &&
  332. cbt->liveness.num_recent_circs > 0) {
  333. memcpy(recent_circs, cbt->liveness.timeouts_after_firsthop,
  334. sizeof(int8_t)*MIN(num, cbt->liveness.num_recent_circs));
  335. }
  336. // Adjust the index if it needs it.
  337. if (num < cbt->liveness.num_recent_circs) {
  338. cbt->liveness.after_firsthop_idx = MIN(num-1,
  339. cbt->liveness.after_firsthop_idx);
  340. }
  341. tor_free(cbt->liveness.timeouts_after_firsthop);
  342. cbt->liveness.timeouts_after_firsthop = recent_circs;
  343. cbt->liveness.num_recent_circs = num;
  344. }
  345. /* else no change, nothing to do */
  346. } else { /* num == 0 */
  347. /*
  348. * Weird. This probably shouldn't happen, so log a warning, but try
  349. * to do something sensible anyway.
  350. */
  351. log_warn(LD_CIRC,
  352. "The cbtrecentcircs consensus parameter came back zero! "
  353. "This disables adaptive timeouts since we can't keep track of "
  354. "any recent circuits.");
  355. circuit_build_times_free_timeouts(cbt);
  356. }
  357. } else {
  358. /*
  359. * Adaptive timeouts are disabled; this might be because of the
  360. * LearnCircuitBuildTimes config parameter, and hence permanent, or
  361. * the cbtdisabled consensus parameter, so it may be a new condition.
  362. * Treat it like getting num == 0 above and free the circuit history
  363. * if we have any.
  364. */
  365. circuit_build_times_free_timeouts(cbt);
  366. }
  367. }
  368. /**
  369. * Return the initial default or configured timeout in milliseconds
  370. */
  371. static double
  372. circuit_build_times_get_initial_timeout(void)
  373. {
  374. double timeout;
  375. /*
  376. * Check if we have LearnCircuitBuildTimeout, and if we don't,
  377. * always use CircuitBuildTimeout, no questions asked.
  378. */
  379. if (get_options()->LearnCircuitBuildTimeout) {
  380. if (!unit_tests && get_options()->CircuitBuildTimeout) {
  381. timeout = get_options()->CircuitBuildTimeout*1000;
  382. if (timeout < circuit_build_times_min_timeout()) {
  383. log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
  384. circuit_build_times_min_timeout()/1000);
  385. timeout = circuit_build_times_min_timeout();
  386. }
  387. } else {
  388. timeout = circuit_build_times_initial_timeout();
  389. }
  390. } else {
  391. timeout = get_options()->CircuitBuildTimeout*1000;
  392. }
  393. return timeout;
  394. }
  395. /**
  396. * Reset the build time state.
  397. *
  398. * Leave estimated parameters, timeout and network liveness intact
  399. * for future use.
  400. */
  401. void
  402. circuit_build_times_reset(circuit_build_times_t *cbt)
  403. {
  404. memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times));
  405. cbt->total_build_times = 0;
  406. cbt->build_times_idx = 0;
  407. cbt->have_computed_timeout = 0;
  408. }
  409. /**
  410. * Initialize the buildtimes structure for first use.
  411. *
  412. * Sets the initial timeout values based on either the config setting,
  413. * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
  414. */
  415. void
  416. circuit_build_times_init(circuit_build_times_t *cbt)
  417. {
  418. memset(cbt, 0, sizeof(*cbt));
  419. /*
  420. * Check if we really are using adaptive timeouts, and don't keep
  421. * track of this stuff if not.
  422. */
  423. if (!circuit_build_times_disabled()) {
  424. cbt->liveness.num_recent_circs =
  425. circuit_build_times_recent_circuit_count(NULL);
  426. cbt->liveness.timeouts_after_firsthop =
  427. tor_malloc_zero(sizeof(int8_t)*cbt->liveness.num_recent_circs);
  428. } else {
  429. cbt->liveness.num_recent_circs = 0;
  430. cbt->liveness.timeouts_after_firsthop = NULL;
  431. }
  432. cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout();
  433. control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
  434. }
  435. /**
  436. * Free the saved timeouts, if the cbtdisabled consensus parameter got turned
  437. * on or something.
  438. */
  439. void
  440. circuit_build_times_free_timeouts(circuit_build_times_t *cbt)
  441. {
  442. if (!cbt) return;
  443. if (cbt->liveness.timeouts_after_firsthop) {
  444. tor_free(cbt->liveness.timeouts_after_firsthop);
  445. }
  446. cbt->liveness.num_recent_circs = 0;
  447. }
  448. #if 0
  449. /**
  450. * Rewind our build time history by n positions.
  451. */
  452. static void
  453. circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
  454. {
  455. int i = 0;
  456. cbt->build_times_idx -= n;
  457. cbt->build_times_idx %= CBT_NCIRCUITS_TO_OBSERVE;
  458. for (i = 0; i < n; i++) {
  459. cbt->circuit_build_times[(i+cbt->build_times_idx)
  460. %CBT_NCIRCUITS_TO_OBSERVE]=0;
  461. }
  462. if (cbt->total_build_times > n) {
  463. cbt->total_build_times -= n;
  464. } else {
  465. cbt->total_build_times = 0;
  466. }
  467. log_info(LD_CIRC,
  468. "Rewound history by %d places. Current index: %d. "
  469. "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
  470. }
  471. #endif
  472. /**
  473. * Add a new build time value <b>time</b> to the set of build times. Time
  474. * units are milliseconds.
  475. *
  476. * circuit_build_times <b>cbt</b> is a circular array, so loop around when
  477. * array is full.
  478. */
  479. int
  480. circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time)
  481. {
  482. if (time <= 0 || time > CBT_BUILD_TIME_MAX) {
  483. log_warn(LD_BUG, "Circuit build time is too large (%u)."
  484. "This is probably a bug.", time);
  485. tor_fragile_assert();
  486. return -1;
  487. }
  488. log_debug(LD_CIRC, "Adding circuit build time %u", time);
  489. cbt->circuit_build_times[cbt->build_times_idx] = time;
  490. cbt->build_times_idx = (cbt->build_times_idx + 1) % CBT_NCIRCUITS_TO_OBSERVE;
  491. if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
  492. cbt->total_build_times++;
  493. if ((cbt->total_build_times % CBT_SAVE_STATE_EVERY) == 0) {
  494. /* Save state every n circuit builds */
  495. if (!unit_tests && !get_options()->AvoidDiskWrites)
  496. or_state_mark_dirty(get_or_state(), 0);
  497. }
  498. return 0;
  499. }
  500. /**
  501. * Return maximum circuit build time
  502. */
  503. static build_time_t
  504. circuit_build_times_max(circuit_build_times_t *cbt)
  505. {
  506. int i = 0;
  507. build_time_t max_build_time = 0;
  508. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  509. if (cbt->circuit_build_times[i] > max_build_time
  510. && cbt->circuit_build_times[i] != CBT_BUILD_ABANDONED)
  511. max_build_time = cbt->circuit_build_times[i];
  512. }
  513. return max_build_time;
  514. }
  515. #if 0
  516. /** Return minimum circuit build time */
  517. build_time_t
  518. circuit_build_times_min(circuit_build_times_t *cbt)
  519. {
  520. int i = 0;
  521. build_time_t min_build_time = CBT_BUILD_TIME_MAX;
  522. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  523. if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
  524. cbt->circuit_build_times[i] < min_build_time)
  525. min_build_time = cbt->circuit_build_times[i];
  526. }
  527. if (min_build_time == CBT_BUILD_TIME_MAX) {
  528. log_warn(LD_CIRC, "No build times less than CBT_BUILD_TIME_MAX!");
  529. }
  530. return min_build_time;
  531. }
  532. #endif
  533. /**
  534. * Calculate and return a histogram for the set of build times.
  535. *
  536. * Returns an allocated array of histrogram bins representing
  537. * the frequency of index*CBT_BIN_WIDTH millisecond
  538. * build times. Also outputs the number of bins in nbins.
  539. *
  540. * The return value must be freed by the caller.
  541. */
  542. static uint32_t *
  543. circuit_build_times_create_histogram(circuit_build_times_t *cbt,
  544. build_time_t *nbins)
  545. {
  546. uint32_t *histogram;
  547. build_time_t max_build_time = circuit_build_times_max(cbt);
  548. int i, c;
  549. *nbins = 1 + (max_build_time / CBT_BIN_WIDTH);
  550. histogram = tor_malloc_zero(*nbins * sizeof(build_time_t));
  551. // calculate histogram
  552. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  553. if (cbt->circuit_build_times[i] == 0
  554. || cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
  555. continue; /* 0 <-> uninitialized */
  556. c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH);
  557. histogram[c]++;
  558. }
  559. return histogram;
  560. }
  561. /**
  562. * Return the Pareto start-of-curve parameter Xm.
  563. *
  564. * Because we are not a true Pareto curve, we compute this as the
  565. * weighted average of the N most frequent build time bins. N is either
  566. * 1 if we don't have enough circuit build time data collected, or
  567. * determined by the consensus parameter cbtnummodes (default 3).
  568. */
  569. static build_time_t
  570. circuit_build_times_get_xm(circuit_build_times_t *cbt)
  571. {
  572. build_time_t i, nbins;
  573. build_time_t *nth_max_bin;
  574. int32_t bin_counts=0;
  575. build_time_t ret = 0;
  576. uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
  577. int n=0;
  578. int num_modes = circuit_build_times_default_num_xm_modes();
  579. tor_assert(nbins > 0);
  580. tor_assert(num_modes > 0);
  581. // Only use one mode if < 1000 buildtimes. Not enough data
  582. // for multiple.
  583. if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
  584. num_modes = 1;
  585. nth_max_bin = (build_time_t*)tor_malloc_zero(num_modes*sizeof(build_time_t));
  586. /* Determine the N most common build times */
  587. for (i = 0; i < nbins; i++) {
  588. if (histogram[i] >= histogram[nth_max_bin[0]]) {
  589. nth_max_bin[0] = i;
  590. }
  591. for (n = 1; n < num_modes; n++) {
  592. if (histogram[i] >= histogram[nth_max_bin[n]] &&
  593. (!histogram[nth_max_bin[n-1]]
  594. || histogram[i] < histogram[nth_max_bin[n-1]])) {
  595. nth_max_bin[n] = i;
  596. }
  597. }
  598. }
  599. for (n = 0; n < num_modes; n++) {
  600. bin_counts += histogram[nth_max_bin[n]];
  601. ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
  602. log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]),
  603. histogram[nth_max_bin[n]]);
  604. }
  605. /* The following assert is safe, because we don't get called when we
  606. * haven't observed at least CBT_MIN_MIN_CIRCUITS_TO_OBSERVE circuits. */
  607. tor_assert(bin_counts > 0);
  608. ret /= bin_counts;
  609. tor_free(histogram);
  610. tor_free(nth_max_bin);
  611. return ret;
  612. }
  613. /**
  614. * Output a histogram of current circuit build times to
  615. * the or_state_t state structure.
  616. */
  617. void
  618. circuit_build_times_update_state(circuit_build_times_t *cbt,
  619. or_state_t *state)
  620. {
  621. uint32_t *histogram;
  622. build_time_t i = 0;
  623. build_time_t nbins = 0;
  624. config_line_t **next, *line;
  625. histogram = circuit_build_times_create_histogram(cbt, &nbins);
  626. // write to state
  627. config_free_lines(state->BuildtimeHistogram);
  628. next = &state->BuildtimeHistogram;
  629. *next = NULL;
  630. state->TotalBuildTimes = cbt->total_build_times;
  631. state->CircuitBuildAbandonedCount = 0;
  632. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  633. if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
  634. state->CircuitBuildAbandonedCount++;
  635. }
  636. for (i = 0; i < nbins; i++) {
  637. // compress the histogram by skipping the blanks
  638. if (histogram[i] == 0) continue;
  639. *next = line = tor_malloc_zero(sizeof(config_line_t));
  640. line->key = tor_strdup("CircuitBuildTimeBin");
  641. tor_asprintf(&line->value, "%d %d",
  642. CBT_BIN_TO_MS(i), histogram[i]);
  643. next = &(line->next);
  644. }
  645. if (!unit_tests) {
  646. if (!get_options()->AvoidDiskWrites)
  647. or_state_mark_dirty(get_or_state(), 0);
  648. }
  649. tor_free(histogram);
  650. }
  651. /**
  652. * Shuffle the build times array.
  653. *
  654. * Adapted from http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
  655. */
  656. static void
  657. circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt,
  658. build_time_t *raw_times,
  659. uint32_t num_times)
  660. {
  661. uint32_t n = num_times;
  662. if (num_times > CBT_NCIRCUITS_TO_OBSERVE) {
  663. log_notice(LD_CIRC, "The number of circuit times that this Tor version "
  664. "uses to calculate build times is less than the number stored "
  665. "in your state file. Decreasing the circuit time history from "
  666. "%lu to %d.", (unsigned long)num_times,
  667. CBT_NCIRCUITS_TO_OBSERVE);
  668. }
  669. if (n > INT_MAX-1) {
  670. log_warn(LD_CIRC, "For some insane reasons, you had %lu circuit build "
  671. "observations in your state file. That's far too many; probably "
  672. "there's a bug here.", (unsigned long)n);
  673. n = INT_MAX-1;
  674. }
  675. /* This code can only be run on a compact array */
  676. while (n-- > 1) {
  677. int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */
  678. build_time_t tmp = raw_times[k];
  679. raw_times[k] = raw_times[n];
  680. raw_times[n] = tmp;
  681. }
  682. /* Since the times are now shuffled, take a random CBT_NCIRCUITS_TO_OBSERVE
  683. * subset (ie the first CBT_NCIRCUITS_TO_OBSERVE values) */
  684. for (n = 0; n < MIN(num_times, CBT_NCIRCUITS_TO_OBSERVE); n++) {
  685. circuit_build_times_add_time(cbt, raw_times[n]);
  686. }
  687. }
  688. /**
  689. * Filter old synthetic timeouts that were created before the
  690. * new right-censored Pareto calculation was deployed.
  691. *
  692. * Once all clients before 0.2.1.13-alpha are gone, this code
  693. * will be unused.
  694. */
  695. static int
  696. circuit_build_times_filter_timeouts(circuit_build_times_t *cbt)
  697. {
  698. int num_filtered=0, i=0;
  699. double timeout_rate = 0;
  700. build_time_t max_timeout = 0;
  701. timeout_rate = circuit_build_times_timeout_rate(cbt);
  702. max_timeout = (build_time_t)cbt->close_ms;
  703. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  704. if (cbt->circuit_build_times[i] > max_timeout) {
  705. build_time_t replaced = cbt->circuit_build_times[i];
  706. num_filtered++;
  707. cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED;
  708. log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced,
  709. cbt->circuit_build_times[i]);
  710. }
  711. }
  712. log_info(LD_CIRC,
  713. "We had %d timeouts out of %d build times, "
  714. "and filtered %d above the max of %u",
  715. (int)(cbt->total_build_times*timeout_rate),
  716. cbt->total_build_times, num_filtered, max_timeout);
  717. return num_filtered;
  718. }
  719. /**
  720. * Load histogram from <b>state</b>, shuffling the resulting array
  721. * after we do so. Use this result to estimate parameters and
  722. * calculate the timeout.
  723. *
  724. * Return -1 on error.
  725. */
  726. int
  727. circuit_build_times_parse_state(circuit_build_times_t *cbt,
  728. or_state_t *state)
  729. {
  730. int tot_values = 0;
  731. uint32_t loaded_cnt = 0, N = 0;
  732. config_line_t *line;
  733. unsigned int i;
  734. build_time_t *loaded_times;
  735. int err = 0;
  736. circuit_build_times_init(cbt);
  737. if (circuit_build_times_disabled()) {
  738. return 0;
  739. }
  740. /* build_time_t 0 means uninitialized */
  741. loaded_times = tor_malloc_zero(sizeof(build_time_t)*state->TotalBuildTimes);
  742. for (line = state->BuildtimeHistogram; line; line = line->next) {
  743. smartlist_t *args = smartlist_new();
  744. smartlist_split_string(args, line->value, " ",
  745. SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
  746. if (smartlist_len(args) < 2) {
  747. log_warn(LD_GENERAL, "Unable to parse circuit build times: "
  748. "Too few arguments to CircuitBuildTime");
  749. err = 1;
  750. SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
  751. smartlist_free(args);
  752. break;
  753. } else {
  754. const char *ms_str = smartlist_get(args,0);
  755. const char *count_str = smartlist_get(args,1);
  756. uint32_t count, k;
  757. build_time_t ms;
  758. int ok;
  759. ms = (build_time_t)tor_parse_ulong(ms_str, 0, 0,
  760. CBT_BUILD_TIME_MAX, &ok, NULL);
  761. if (!ok) {
  762. log_warn(LD_GENERAL, "Unable to parse circuit build times: "
  763. "Unparsable bin number");
  764. err = 1;
  765. SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
  766. smartlist_free(args);
  767. break;
  768. }
  769. count = (uint32_t)tor_parse_ulong(count_str, 0, 0,
  770. UINT32_MAX, &ok, NULL);
  771. if (!ok) {
  772. log_warn(LD_GENERAL, "Unable to parse circuit build times: "
  773. "Unparsable bin count");
  774. err = 1;
  775. SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
  776. smartlist_free(args);
  777. break;
  778. }
  779. if (loaded_cnt+count+state->CircuitBuildAbandonedCount
  780. > state->TotalBuildTimes) {
  781. log_warn(LD_CIRC,
  782. "Too many build times in state file. "
  783. "Stopping short before %d",
  784. loaded_cnt+count);
  785. SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
  786. smartlist_free(args);
  787. break;
  788. }
  789. for (k = 0; k < count; k++) {
  790. loaded_times[loaded_cnt++] = ms;
  791. }
  792. N++;
  793. SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
  794. smartlist_free(args);
  795. }
  796. }
  797. log_info(LD_CIRC,
  798. "Adding %d timeouts.", state->CircuitBuildAbandonedCount);
  799. for (i=0; i < state->CircuitBuildAbandonedCount; i++) {
  800. loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED;
  801. }
  802. if (loaded_cnt != state->TotalBuildTimes) {
  803. log_warn(LD_CIRC,
  804. "Corrupt state file? Build times count mismatch. "
  805. "Read %d times, but file says %d", loaded_cnt,
  806. state->TotalBuildTimes);
  807. err = 1;
  808. circuit_build_times_reset(cbt);
  809. goto done;
  810. }
  811. circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
  812. /* Verify that we didn't overwrite any indexes */
  813. for (i=0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  814. if (!cbt->circuit_build_times[i])
  815. break;
  816. tot_values++;
  817. }
  818. log_info(LD_CIRC,
  819. "Loaded %d/%d values from %d lines in circuit time histogram",
  820. tot_values, cbt->total_build_times, N);
  821. if (cbt->total_build_times != tot_values
  822. || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) {
  823. log_warn(LD_CIRC,
  824. "Corrupt state file? Shuffled build times mismatch. "
  825. "Read %d times, but file says %d", tot_values,
  826. state->TotalBuildTimes);
  827. err = 1;
  828. circuit_build_times_reset(cbt);
  829. goto done;
  830. }
  831. circuit_build_times_set_timeout(cbt);
  832. if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) {
  833. circuit_build_times_filter_timeouts(cbt);
  834. }
  835. done:
  836. tor_free(loaded_times);
  837. return err ? -1 : 0;
  838. }
  839. /**
  840. * Estimates the Xm and Alpha parameters using
  841. * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
  842. *
  843. * The notable difference is that we use mode instead of min to estimate Xm.
  844. * This is because our distribution is frechet-like. We claim this is
  845. * an acceptable approximation because we are only concerned with the
  846. * accuracy of the CDF of the tail.
  847. */
  848. int
  849. circuit_build_times_update_alpha(circuit_build_times_t *cbt)
  850. {
  851. build_time_t *x=cbt->circuit_build_times;
  852. double a = 0;
  853. int n=0,i=0,abandoned_count=0;
  854. build_time_t max_time=0;
  855. /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
  856. /* We sort of cheat here and make our samples slightly more pareto-like
  857. * and less frechet-like. */
  858. cbt->Xm = circuit_build_times_get_xm(cbt);
  859. tor_assert(cbt->Xm > 0);
  860. for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) {
  861. if (!x[i]) {
  862. continue;
  863. }
  864. if (x[i] < cbt->Xm) {
  865. a += tor_mathlog(cbt->Xm);
  866. } else if (x[i] == CBT_BUILD_ABANDONED) {
  867. abandoned_count++;
  868. } else {
  869. a += tor_mathlog(x[i]);
  870. if (x[i] > max_time)
  871. max_time = x[i];
  872. }
  873. n++;
  874. }
  875. /*
  876. * We are erring and asserting here because this can only happen
  877. * in codepaths other than startup. The startup state parsing code
  878. * performs this same check, and resets state if it hits it. If we
  879. * hit it at runtime, something serious has gone wrong.
  880. */
  881. if (n!=cbt->total_build_times) {
  882. log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
  883. cbt->total_build_times);
  884. }
  885. tor_assert(n==cbt->total_build_times);
  886. if (max_time <= 0) {
  887. /* This can happen if Xm is actually the *maximum* value in the set.
  888. * It can also happen if we've abandoned every single circuit somehow.
  889. * In either case, tell the caller not to compute a new build timeout. */
  890. log_warn(LD_BUG,
  891. "Could not determine largest build time (%d). "
  892. "Xm is %dms and we've abandoned %d out of %d circuits.", max_time,
  893. cbt->Xm, abandoned_count, n);
  894. return 0;
  895. }
  896. a += abandoned_count*tor_mathlog(max_time);
  897. a -= n*tor_mathlog(cbt->Xm);
  898. // Estimator comes from Eq #4 in:
  899. // "Bayesian estimation based on trimmed samples from Pareto populations"
  900. // by Arturo J. Fernández. We are right-censored only.
  901. a = (n-abandoned_count)/a;
  902. cbt->alpha = a;
  903. return 1;
  904. }
  905. /**
  906. * This is the Pareto Quantile Function. It calculates the point x
  907. * in the distribution such that F(x) = quantile (ie quantile*100%
  908. * of the mass of the density function is below x on the curve).
  909. *
  910. * We use it to calculate the timeout and also to generate synthetic
  911. * values of time for circuits that timeout before completion.
  912. *
  913. * See http://en.wikipedia.org/wiki/Quantile_function,
  914. * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
  915. * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_
  916. * random_sample_from_Pareto_distribution
  917. * That's right. I'll cite wikipedia all day long.
  918. *
  919. * Return value is in milliseconds.
  920. */
  921. double
  922. circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
  923. double quantile)
  924. {
  925. double ret;
  926. tor_assert(quantile >= 0);
  927. tor_assert(1.0-quantile > 0);
  928. tor_assert(cbt->Xm > 0);
  929. ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha);
  930. if (ret > INT32_MAX) {
  931. ret = INT32_MAX;
  932. }
  933. tor_assert(ret > 0);
  934. return ret;
  935. }
  936. /** Pareto CDF */
  937. double
  938. circuit_build_times_cdf(circuit_build_times_t *cbt, double x)
  939. {
  940. double ret;
  941. tor_assert(cbt->Xm > 0);
  942. ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
  943. tor_assert(0 <= ret && ret <= 1.0);
  944. return ret;
  945. }
  946. /**
  947. * Generate a synthetic time using our distribution parameters.
  948. *
  949. * The return value will be within the [q_lo, q_hi) quantile points
  950. * on the CDF.
  951. */
  952. build_time_t
  953. circuit_build_times_generate_sample(circuit_build_times_t *cbt,
  954. double q_lo, double q_hi)
  955. {
  956. double randval = crypto_rand_double();
  957. build_time_t ret;
  958. double u;
  959. /* Generate between [q_lo, q_hi) */
  960. /*XXXX This is what nextafter is supposed to be for; we should use it on the
  961. * platforms that support it. */
  962. q_hi -= 1.0/(INT32_MAX);
  963. tor_assert(q_lo >= 0);
  964. tor_assert(q_hi < 1);
  965. tor_assert(q_lo < q_hi);
  966. u = q_lo + (q_hi-q_lo)*randval;
  967. tor_assert(0 <= u && u < 1.0);
  968. /* circuit_build_times_calculate_timeout returns <= INT32_MAX */
  969. ret = (build_time_t)
  970. tor_lround(circuit_build_times_calculate_timeout(cbt, u));
  971. tor_assert(ret > 0);
  972. return ret;
  973. }
  974. /**
  975. * Estimate an initial alpha parameter by solving the quantile
  976. * function with a quantile point and a specific timeout value.
  977. */
  978. void
  979. circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
  980. double quantile, double timeout_ms)
  981. {
  982. // Q(u) = Xm/((1-u)^(1/a))
  983. // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
  984. // CircBuildTimeout = Xm/((1-0.8))^(1/a))
  985. // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
  986. // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
  987. // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
  988. tor_assert(quantile >= 0);
  989. tor_assert(cbt->Xm > 0);
  990. cbt->alpha = tor_mathlog(1.0-quantile)/
  991. (tor_mathlog(cbt->Xm)-tor_mathlog(timeout_ms));
  992. tor_assert(cbt->alpha > 0);
  993. }
  994. /**
  995. * Returns true if we need circuits to be built
  996. */
  997. int
  998. circuit_build_times_needs_circuits(circuit_build_times_t *cbt)
  999. {
  1000. /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
  1001. return !circuit_build_times_enough_to_compute(cbt);
  1002. }
  1003. /**
  1004. * Returns true if we should build a timeout test circuit
  1005. * right now.
  1006. */
  1007. int
  1008. circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
  1009. {
  1010. return circuit_build_times_needs_circuits(cbt) &&
  1011. approx_time()-cbt->last_circ_at > circuit_build_times_test_frequency();
  1012. }
  1013. /**
  1014. * Called to indicate that the network showed some signs of liveness,
  1015. * i.e. we received a cell.
  1016. *
  1017. * This is used by circuit_build_times_network_check_live() to decide
  1018. * if we should record the circuit build timeout or not.
  1019. *
  1020. * This function is called every time we receive a cell. Avoid
  1021. * syscalls, events, and other high-intensity work.
  1022. */
  1023. void
  1024. circuit_build_times_network_is_live(circuit_build_times_t *cbt)
  1025. {
  1026. time_t now = approx_time();
  1027. if (cbt->liveness.nonlive_timeouts > 0) {
  1028. log_notice(LD_CIRC,
  1029. "Tor now sees network activity. Restoring circuit build "
  1030. "timeout recording. Network was down for %d seconds "
  1031. "during %d circuit attempts.",
  1032. (int)(now - cbt->liveness.network_last_live),
  1033. cbt->liveness.nonlive_timeouts);
  1034. }
  1035. cbt->liveness.network_last_live = now;
  1036. cbt->liveness.nonlive_timeouts = 0;
  1037. }
  1038. /**
  1039. * Called to indicate that we completed a circuit. Because this circuit
  1040. * succeeded, it doesn't count as a timeout-after-the-first-hop.
  1041. *
  1042. * This is used by circuit_build_times_network_check_changed() to determine
  1043. * if we had too many recent timeouts and need to reset our learned timeout
  1044. * to something higher.
  1045. */
  1046. void
  1047. circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
  1048. {
  1049. /* Check for NULLness because we might not be using adaptive timeouts */
  1050. if (cbt->liveness.timeouts_after_firsthop &&
  1051. cbt->liveness.num_recent_circs > 0) {
  1052. cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]
  1053. = 0;
  1054. cbt->liveness.after_firsthop_idx++;
  1055. cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
  1056. }
  1057. }
  1058. /**
  1059. * A circuit just timed out. If it failed after the first hop, record it
  1060. * in our history for later deciding if the network speed has changed.
  1061. *
  1062. * This is used by circuit_build_times_network_check_changed() to determine
  1063. * if we had too many recent timeouts and need to reset our learned timeout
  1064. * to something higher.
  1065. */
  1066. static void
  1067. circuit_build_times_network_timeout(circuit_build_times_t *cbt,
  1068. int did_onehop)
  1069. {
  1070. /* Check for NULLness because we might not be using adaptive timeouts */
  1071. if (cbt->liveness.timeouts_after_firsthop &&
  1072. cbt->liveness.num_recent_circs > 0) {
  1073. if (did_onehop) {
  1074. cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]
  1075. = 1;
  1076. cbt->liveness.after_firsthop_idx++;
  1077. cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
  1078. }
  1079. }
  1080. }
  1081. /**
  1082. * A circuit was just forcibly closed. If there has been no recent network
  1083. * activity at all, but this circuit was launched back when we thought the
  1084. * network was live, increment the number of "nonlive" circuit timeouts.
  1085. *
  1086. * This is used by circuit_build_times_network_check_live() to decide
  1087. * if we should record the circuit build timeout or not.
  1088. */
  1089. static void
  1090. circuit_build_times_network_close(circuit_build_times_t *cbt,
  1091. int did_onehop, time_t start_time)
  1092. {
  1093. time_t now = time(NULL);
  1094. /*
  1095. * Check if this is a timeout that was for a circuit that spent its
  1096. * entire existence during a time where we have had no network activity.
  1097. */
  1098. if (cbt->liveness.network_last_live < start_time) {
  1099. if (did_onehop) {
  1100. char last_live_buf[ISO_TIME_LEN+1];
  1101. char start_time_buf[ISO_TIME_LEN+1];
  1102. char now_buf[ISO_TIME_LEN+1];
  1103. format_local_iso_time(last_live_buf, cbt->liveness.network_last_live);
  1104. format_local_iso_time(start_time_buf, start_time);
  1105. format_local_iso_time(now_buf, now);
  1106. log_warn(LD_BUG,
  1107. "Circuit somehow completed a hop while the network was "
  1108. "not live. Network was last live at %s, but circuit launched "
  1109. "at %s. It's now %s.", last_live_buf, start_time_buf,
  1110. now_buf);
  1111. }
  1112. cbt->liveness.nonlive_timeouts++;
  1113. if (cbt->liveness.nonlive_timeouts == 1) {
  1114. log_notice(LD_CIRC,
  1115. "Tor has not observed any network activity for the past %d "
  1116. "seconds. Disabling circuit build timeout recording.",
  1117. (int)(now - cbt->liveness.network_last_live));
  1118. } else {
  1119. log_info(LD_CIRC,
  1120. "Got non-live timeout. Current count is: %d",
  1121. cbt->liveness.nonlive_timeouts);
  1122. }
  1123. }
  1124. }
  1125. /**
  1126. * When the network is not live, we do not record circuit build times.
  1127. *
  1128. * The network is considered not live if there has been at least one
  1129. * circuit build that began and ended (had its close_ms measurement
  1130. * period expire) since we last received a cell.
  1131. *
  1132. * Also has the side effect of rewinding the circuit time history
  1133. * in the case of recent liveness changes.
  1134. */
  1135. int
  1136. circuit_build_times_network_check_live(circuit_build_times_t *cbt)
  1137. {
  1138. if (cbt->liveness.nonlive_timeouts > 0) {
  1139. return 0;
  1140. }
  1141. return 1;
  1142. }
  1143. /**
  1144. * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
  1145. * the past RECENT_CIRCUITS time out after the first hop. Used to detect
  1146. * if the network connection has changed significantly, and if so,
  1147. * resets our circuit build timeout to the default.
  1148. *
  1149. * Also resets the entire timeout history in this case and causes us
  1150. * to restart the process of building test circuits and estimating a
  1151. * new timeout.
  1152. */
  1153. int
  1154. circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
  1155. {
  1156. int total_build_times = cbt->total_build_times;
  1157. int timeout_count=0;
  1158. int i;
  1159. if (cbt->liveness.timeouts_after_firsthop &&
  1160. cbt->liveness.num_recent_circs > 0) {
  1161. /* how many of our recent circuits made it to the first hop but then
  1162. * timed out? */
  1163. for (i = 0; i < cbt->liveness.num_recent_circs; i++) {
  1164. timeout_count += cbt->liveness.timeouts_after_firsthop[i];
  1165. }
  1166. }
  1167. /* If 80% of our recent circuits are timing out after the first hop,
  1168. * we need to re-estimate a new initial alpha and timeout. */
  1169. if (timeout_count < circuit_build_times_max_timeouts()) {
  1170. return 0;
  1171. }
  1172. circuit_build_times_reset(cbt);
  1173. if (cbt->liveness.timeouts_after_firsthop &&
  1174. cbt->liveness.num_recent_circs > 0) {
  1175. memset(cbt->liveness.timeouts_after_firsthop, 0,
  1176. sizeof(*cbt->liveness.timeouts_after_firsthop)*
  1177. cbt->liveness.num_recent_circs);
  1178. }
  1179. cbt->liveness.after_firsthop_idx = 0;
  1180. /* Check to see if this has happened before. If so, double the timeout
  1181. * to give people on abysmally bad network connections a shot at access */
  1182. if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) {
  1183. if (cbt->timeout_ms > INT32_MAX/2 || cbt->close_ms > INT32_MAX/2) {
  1184. log_warn(LD_CIRC, "Insanely large circuit build timeout value. "
  1185. "(timeout = %fmsec, close = %fmsec)",
  1186. cbt->timeout_ms, cbt->close_ms);
  1187. } else {
  1188. cbt->timeout_ms *= 2;
  1189. cbt->close_ms *= 2;
  1190. }
  1191. } else {
  1192. cbt->close_ms = cbt->timeout_ms
  1193. = circuit_build_times_get_initial_timeout();
  1194. }
  1195. control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
  1196. log_notice(LD_CIRC,
  1197. "Your network connection speed appears to have changed. Resetting "
  1198. "timeout to %lds after %d timeouts and %d buildtimes.",
  1199. tor_lround(cbt->timeout_ms/1000), timeout_count,
  1200. total_build_times);
  1201. return 1;
  1202. }
  1203. /**
  1204. * Count the number of timeouts in a set of cbt data.
  1205. */
  1206. double
  1207. circuit_build_times_timeout_rate(const circuit_build_times_t *cbt)
  1208. {
  1209. int i=0,timeouts=0;
  1210. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  1211. if (cbt->circuit_build_times[i] >= cbt->timeout_ms) {
  1212. timeouts++;
  1213. }
  1214. }
  1215. if (!cbt->total_build_times)
  1216. return 0;
  1217. return ((double)timeouts)/cbt->total_build_times;
  1218. }
  1219. /**
  1220. * Count the number of closed circuits in a set of cbt data.
  1221. */
  1222. double
  1223. circuit_build_times_close_rate(const circuit_build_times_t *cbt)
  1224. {
  1225. int i=0,closed=0;
  1226. for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
  1227. if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) {
  1228. closed++;
  1229. }
  1230. }
  1231. if (!cbt->total_build_times)
  1232. return 0;
  1233. return ((double)closed)/cbt->total_build_times;
  1234. }
  1235. /**
  1236. * Store a timeout as a synthetic value.
  1237. *
  1238. * Returns true if the store was successful and we should possibly
  1239. * update our timeout estimate.
  1240. */
  1241. int
  1242. circuit_build_times_count_close(circuit_build_times_t *cbt,
  1243. int did_onehop,
  1244. time_t start_time)
  1245. {
  1246. if (circuit_build_times_disabled()) {
  1247. cbt->close_ms = cbt->timeout_ms
  1248. = circuit_build_times_get_initial_timeout();
  1249. return 0;
  1250. }
  1251. /* Record this force-close to help determine if the network is dead */
  1252. circuit_build_times_network_close(cbt, did_onehop, start_time);
  1253. /* Only count timeouts if network is live.. */
  1254. if (!circuit_build_times_network_check_live(cbt)) {
  1255. return 0;
  1256. }
  1257. circuit_build_times_add_time(cbt, CBT_BUILD_ABANDONED);
  1258. return 1;
  1259. }
  1260. /**
  1261. * Update timeout counts to determine if we need to expire
  1262. * our build time history due to excessive timeouts.
  1263. *
  1264. * We do not record any actual time values at this stage;
  1265. * we are only interested in recording the fact that a timeout
  1266. * happened. We record the time values via
  1267. * circuit_build_times_count_close() and circuit_build_times_add_time().
  1268. */
  1269. void
  1270. circuit_build_times_count_timeout(circuit_build_times_t *cbt,
  1271. int did_onehop)
  1272. {
  1273. if (circuit_build_times_disabled()) {
  1274. cbt->close_ms = cbt->timeout_ms
  1275. = circuit_build_times_get_initial_timeout();
  1276. return;
  1277. }
  1278. /* Register the fact that a timeout just occurred. */
  1279. circuit_build_times_network_timeout(cbt, did_onehop);
  1280. /* If there are a ton of timeouts, we should reset
  1281. * the circuit build timeout. */
  1282. circuit_build_times_network_check_changed(cbt);
  1283. }
  1284. /**
  1285. * Estimate a new timeout based on history and set our timeout
  1286. * variable accordingly.
  1287. */
  1288. static int
  1289. circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
  1290. {
  1291. build_time_t max_time;
  1292. if (!circuit_build_times_enough_to_compute(cbt))
  1293. return 0;
  1294. if (!circuit_build_times_update_alpha(cbt))
  1295. return 0;
  1296. cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt,
  1297. circuit_build_times_quantile_cutoff());
  1298. cbt->close_ms = circuit_build_times_calculate_timeout(cbt,
  1299. circuit_build_times_close_quantile());
  1300. max_time = circuit_build_times_max(cbt);
  1301. /* Sometimes really fast guard nodes give us such a steep curve
  1302. * that this ends up being not that much greater than timeout_ms.
  1303. * Make it be at least 1 min to handle this case. */
  1304. cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
  1305. if (cbt->timeout_ms > max_time) {
  1306. log_info(LD_CIRC,
  1307. "Circuit build timeout of %dms is beyond the maximum build "
  1308. "time we have ever observed. Capping it to %dms.",
  1309. (int)cbt->timeout_ms, max_time);
  1310. cbt->timeout_ms = max_time;
  1311. }
  1312. if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
  1313. log_info(LD_CIRC,
  1314. "Circuit build measurement period of %dms is more than twice "
  1315. "the maximum build time we have ever observed. Capping it to "
  1316. "%dms.", (int)cbt->close_ms, 2*max_time);
  1317. cbt->close_ms = 2*max_time;
  1318. }
  1319. cbt->have_computed_timeout = 1;
  1320. return 1;
  1321. }
  1322. /**
  1323. * Exposed function to compute a new timeout. Dispatches events and
  1324. * also filters out extremely high timeout values.
  1325. */
  1326. void
  1327. circuit_build_times_set_timeout(circuit_build_times_t *cbt)
  1328. {
  1329. long prev_timeout = tor_lround(cbt->timeout_ms/1000);
  1330. double timeout_rate;
  1331. /*
  1332. * Just return if we aren't using adaptive timeouts
  1333. */
  1334. if (circuit_build_times_disabled())
  1335. return;
  1336. if (!circuit_build_times_set_timeout_worker(cbt))
  1337. return;
  1338. if (cbt->timeout_ms < circuit_build_times_min_timeout()) {
  1339. log_info(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
  1340. cbt->timeout_ms, circuit_build_times_min_timeout());
  1341. cbt->timeout_ms = circuit_build_times_min_timeout();
  1342. if (cbt->close_ms < cbt->timeout_ms) {
  1343. /* This shouldn't happen because of MAX() in timeout_worker above,
  1344. * but doing it just in case */
  1345. cbt->close_ms = circuit_build_times_initial_timeout();
  1346. }
  1347. }
  1348. control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED);
  1349. timeout_rate = circuit_build_times_timeout_rate(cbt);
  1350. if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) {
  1351. log_info(LD_CIRC,
  1352. "Based on %d circuit times, it looks like we don't need to "
  1353. "wait so long for circuits to finish. We will now assume a "
  1354. "circuit is too slow to use after waiting %ld seconds.",
  1355. cbt->total_build_times,
  1356. tor_lround(cbt->timeout_ms/1000));
  1357. log_info(LD_CIRC,
  1358. "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
  1359. cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
  1360. timeout_rate);
  1361. } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) {
  1362. log_info(LD_CIRC,
  1363. "Based on %d circuit times, it looks like we need to wait "
  1364. "longer for circuits to finish. We will now assume a "
  1365. "circuit is too slow to use after waiting %ld seconds.",
  1366. cbt->total_build_times,
  1367. tor_lround(cbt->timeout_ms/1000));
  1368. log_info(LD_CIRC,
  1369. "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
  1370. cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
  1371. timeout_rate);
  1372. } else {
  1373. log_info(LD_CIRC,
  1374. "Set circuit build timeout to %lds (%fms, %fms, Xm: %d, a: %f,"
  1375. " r: %f) based on %d circuit times",
  1376. tor_lround(cbt->timeout_ms/1000),
  1377. cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate,
  1378. cbt->total_build_times);
  1379. }
  1380. }
  1381. /** Make a note that we're running unit tests (rather than running Tor
  1382. * itself), so we avoid clobbering our state file. */
  1383. void
  1384. circuitbuild_running_unit_tests(void)
  1385. {
  1386. unit_tests = 1;
  1387. }