consdiff.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415
  1. /* Copyright (c) 2014, Daniel Martí
  2. * Copyright (c) 2014, The Tor Project, Inc. */
  3. /* See LICENSE for licensing information */
  4. /**
  5. * \file consdiff.c
  6. * \brief Consensus diff implementation, including both the generation and the
  7. * application of diffs in a minimal ed format.
  8. *
  9. * The consensus diff application is done in consdiff_apply_diff, which relies
  10. * on apply_ed_diff for the main ed diff part and on some digest helper
  11. * functions to check the digest hashes found in the consensus diff header.
  12. *
  13. * The consensus diff generation is more complex. consdiff_gen_diff generates
  14. * it, relying on gen_ed_diff to generate the ed diff and some digest helper
  15. * functions to generate the digest hashes.
  16. *
  17. * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
  18. * time and linear space to generate an ed diff given two smartlists. As shown
  19. * in its comment section, calling calc_changes on the entire two consensuses
  20. * will calculate what is to be added and what is to be deleted in the diff.
  21. * Its comment section briefly explains how it works.
  22. *
  23. * In our case specific to consensuses, we take advantage of the fact that
  24. * consensuses list routers sorted by their identities. We use that
  25. * information to avoid running calc_changes on the whole smartlists.
  26. * gen_ed_diff will navigate through the two consensuses identity by identity
  27. * and will send small couples of slices to calc_changes, keeping the running
  28. * time near-linear. This is explained in more detail in the gen_ed_diff
  29. * comments.
  30. *
  31. * The allocation strategy tries to save time and memory by avoiding needless
  32. * copies. Instead of actually splitting the inputs into separate strings, we
  33. * allocate cdline_t objects, each of which represents a line in the original
  34. * object or in the output. We use memarea_t allocators to manage the
  35. * temporary memory we use when generating or applying diffs.
  36. **/
  37. #define CONSDIFF_PRIVATE
  38. #include "or.h"
  39. #include "consdiff.h"
  40. #include "memarea.h"
  41. #include "routerparse.h"
  42. static const char* ns_diff_version = "network-status-diff-version 1";
  43. static const char* hash_token = "hash";
  44. static char *consensus_join_lines(const smartlist_t *inp);
  45. /** Return true iff a and b have the same contents. */
  46. STATIC int
  47. lines_eq(const cdline_t *a, const cdline_t *b)
  48. {
  49. return a->len == b->len && fast_memeq(a->s, b->s, a->len);
  50. }
  51. /** Return true iff a has the same contents as the nul-terminated string b. */
  52. STATIC int
  53. line_str_eq(const cdline_t *a, const char *b)
  54. {
  55. const size_t len = strlen(b);
  56. tor_assert(len <= UINT32_MAX);
  57. cdline_t bline = { b, (uint32_t)len };
  58. return lines_eq(a, &bline);
  59. }
  60. /** Return true iff a begins with the same contents as the nul-terminated
  61. * string b. */
  62. static int
  63. line_starts_with_str(const cdline_t *a, const char *b)
  64. {
  65. const size_t len = strlen(b);
  66. tor_assert(len <= UINT32_MAX);
  67. return a->len >= len && fast_memeq(a->s, b, len);
  68. }
  69. /** Return a new cdline_t holding as its contents the nul-terminated
  70. * string s. Use the provided memory area for storage. */
  71. static cdline_t *
  72. cdline_linecpy(memarea_t *area, const char *s)
  73. {
  74. size_t len = strlen(s);
  75. const char *ss = memarea_memdup(area, s, len);
  76. cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
  77. line->s = ss;
  78. line->len = (uint32_t)len;
  79. return line;
  80. }
  81. /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
  82. * string s. Use the provided memory area for storage. */
  83. STATIC void
  84. smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
  85. {
  86. smartlist_add(lst, cdline_linecpy(area, s));
  87. }
  88. /** Compute the digest of <b>cons</b>, and store the result in
  89. * <b>digest_out</b>. Return 0 on success, -1 on failure. */
  90. /* This is a separate, mockable function so that we can override it when
  91. * fuzzing. */
  92. MOCK_IMPL(STATIC int,
  93. consensus_compute_digest,(const char *cons,
  94. consensus_digest_t *digest_out))
  95. {
  96. int r = crypto_digest256((char*)digest_out->sha3_256,
  97. cons, strlen(cons), DIGEST_SHA3_256);
  98. return r;
  99. }
  100. /** Compute the digest-as-signed of <b>cons</b>, and store the result in
  101. * <b>digest_out</b>. Return 0 on success, -1 on failure. */
  102. /* This is a separate, mockable function so that we can override it when
  103. * fuzzing. */
  104. MOCK_IMPL(STATIC int,
  105. consensus_compute_digest_as_signed,(const char *cons,
  106. consensus_digest_t *digest_out))
  107. {
  108. return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
  109. cons);
  110. }
  111. /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
  112. /* This is a separate, mockable function so that we can override it when
  113. * fuzzing. */
  114. MOCK_IMPL(STATIC int,
  115. consensus_digest_eq,(const uint8_t *d1,
  116. const uint8_t *d2))
  117. {
  118. return fast_memeq(d1, d2, DIGEST256_LEN);
  119. }
  120. /** Create (allocate) a new slice from a smartlist. Assumes that the start
  121. * and the end indexes are within the bounds of the initial smartlist. The end
  122. * element is not part of the resulting slice. If end is -1, the slice is to
  123. * reach the end of the smartlist.
  124. */
  125. STATIC smartlist_slice_t *
  126. smartlist_slice(const smartlist_t *list, int start, int end)
  127. {
  128. int list_len = smartlist_len(list);
  129. tor_assert(start >= 0);
  130. tor_assert(start <= list_len);
  131. if (end == -1) {
  132. end = list_len;
  133. }
  134. tor_assert(start <= end);
  135. smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
  136. slice->list = list;
  137. slice->offset = start;
  138. slice->len = end - start;
  139. return slice;
  140. }
  141. /** Helper: Compute the longest common subsequence lengths for the two slices.
  142. * Used as part of the diff generation to find the column at which to split
  143. * slice2 while still having the optimal solution.
  144. * If direction is -1, the navigation is reversed. Otherwise it must be 1.
  145. * The length of the resulting integer array is that of the second slice plus
  146. * one.
  147. */
  148. STATIC int *
  149. lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
  150. int direction)
  151. {
  152. size_t a_size = sizeof(int) * (slice2->len+1);
  153. /* Resulting lcs lengths. */
  154. int *result = tor_malloc_zero(a_size);
  155. /* Copy of the lcs lengths from the last iteration. */
  156. int *prev = tor_malloc(a_size);
  157. tor_assert(direction == 1 || direction == -1);
  158. int si = slice1->offset;
  159. if (direction == -1) {
  160. si += (slice1->len-1);
  161. }
  162. for (int i = 0; i < slice1->len; ++i, si+=direction) {
  163. const cdline_t *line1 = smartlist_get(slice1->list, si);
  164. /* Store the last results. */
  165. memcpy(prev, result, a_size);
  166. int sj = slice2->offset;
  167. if (direction == -1) {
  168. sj += (slice2->len-1);
  169. }
  170. for (int j = 0; j < slice2->len; ++j, sj+=direction) {
  171. const cdline_t *line2 = smartlist_get(slice2->list, sj);
  172. if (lines_eq(line1, line2)) {
  173. /* If the lines are equal, the lcs is one line longer. */
  174. result[j + 1] = prev[j] + 1;
  175. } else {
  176. /* If not, see what lcs parent path is longer. */
  177. result[j + 1] = MAX(result[j], prev[j + 1]);
  178. }
  179. }
  180. }
  181. tor_free(prev);
  182. return result;
  183. }
  184. /** Helper: Trim any number of lines that are equally at the start or the end
  185. * of both slices.
  186. */
  187. STATIC void
  188. trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
  189. {
  190. while (slice1->len>0 && slice2->len>0) {
  191. const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
  192. const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
  193. if (!lines_eq(line1, line2)) {
  194. break;
  195. }
  196. slice1->offset++; slice1->len--;
  197. slice2->offset++; slice2->len--;
  198. }
  199. int i1 = (slice1->offset+slice1->len)-1;
  200. int i2 = (slice2->offset+slice2->len)-1;
  201. while (slice1->len>0 && slice2->len>0) {
  202. const cdline_t *line1 = smartlist_get(slice1->list, i1);
  203. const cdline_t *line2 = smartlist_get(slice2->list, i2);
  204. if (!lines_eq(line1, line2)) {
  205. break;
  206. }
  207. i1--;
  208. slice1->len--;
  209. i2--;
  210. slice2->len--;
  211. }
  212. }
  213. /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
  214. * bounds of the slice.
  215. */
  216. STATIC int
  217. smartlist_slice_string_pos(const smartlist_slice_t *slice,
  218. const cdline_t *string)
  219. {
  220. int end = slice->offset + slice->len;
  221. for (int i = slice->offset; i < end; ++i) {
  222. const cdline_t *el = smartlist_get(slice->list, i);
  223. if (lines_eq(el, string)) {
  224. return i;
  225. }
  226. }
  227. return -1;
  228. }
  229. /** Helper: Set all the appropriate changed booleans to true. The first slice
  230. * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
  231. * present in the other slice will be set to changed in their bool array.
  232. * The two changed bool arrays are passed in the same order as the slices.
  233. */
  234. STATIC void
  235. set_changed(bitarray_t *changed1, bitarray_t *changed2,
  236. const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
  237. {
  238. int toskip = -1;
  239. tor_assert(slice1->len == 0 || slice1->len == 1);
  240. if (slice1->len == 1) {
  241. const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
  242. toskip = smartlist_slice_string_pos(slice2, line_common);
  243. if (toskip == -1) {
  244. bitarray_set(changed1, slice1->offset);
  245. }
  246. }
  247. int end = slice2->offset + slice2->len;
  248. for (int i = slice2->offset; i < end; ++i) {
  249. if (i != toskip) {
  250. bitarray_set(changed2, i);
  251. }
  252. }
  253. }
  254. /*
  255. * Helper: Given that slice1 has been split by half into top and bot, we want
  256. * to fetch the column at which to split slice2 so that we are still on track
  257. * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
  258. * since the shortest diff is just another way to say the longest common
  259. * subsequence.
  260. */
  261. static int
  262. optimal_column_to_split(const smartlist_slice_t *top,
  263. const smartlist_slice_t *bot,
  264. const smartlist_slice_t *slice2)
  265. {
  266. int *lens_top = lcs_lengths(top, slice2, 1);
  267. int *lens_bot = lcs_lengths(bot, slice2, -1);
  268. int column=0, max_sum=-1;
  269. for (int i = 0; i < slice2->len+1; ++i) {
  270. int sum = lens_top[i] + lens_bot[slice2->len-i];
  271. if (sum > max_sum) {
  272. column = i;
  273. max_sum = sum;
  274. }
  275. }
  276. tor_free(lens_top);
  277. tor_free(lens_bot);
  278. return column;
  279. }
  280. /**
  281. * Helper: Figure out what elements are new or gone on the second smartlist
  282. * relative to the first smartlist, and store the booleans in the bitarrays.
  283. * True on the first bitarray means the element is gone, true on the second
  284. * bitarray means it's new.
  285. *
  286. * In its base case, either of the smartlists is of length <= 1 and we can
  287. * quickly see what elements are new or are gone. In the other case, we will
  288. * split one smartlist by half and we'll use optimal_column_to_split to find
  289. * the optimal column at which to split the second smartlist so that we are
  290. * finding the smallest diff possible.
  291. */
  292. STATIC void
  293. calc_changes(smartlist_slice_t *slice1,
  294. smartlist_slice_t *slice2,
  295. bitarray_t *changed1, bitarray_t *changed2)
  296. {
  297. trim_slices(slice1, slice2);
  298. if (slice1->len <= 1) {
  299. set_changed(changed1, changed2, slice1, slice2);
  300. } else if (slice2->len <= 1) {
  301. set_changed(changed2, changed1, slice2, slice1);
  302. /* Keep on splitting the slices in two. */
  303. } else {
  304. smartlist_slice_t *top, *bot, *left, *right;
  305. /* Split the first slice in half. */
  306. int mid = slice1->len/2;
  307. top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
  308. bot = smartlist_slice(slice1->list, slice1->offset+mid,
  309. slice1->offset+slice1->len);
  310. /* Split the second slice by the optimal column. */
  311. int mid2 = optimal_column_to_split(top, bot, slice2);
  312. left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
  313. right = smartlist_slice(slice2->list, slice2->offset+mid2,
  314. slice2->offset+slice2->len);
  315. calc_changes(top, left, changed1, changed2);
  316. calc_changes(bot, right, changed1, changed2);
  317. tor_free(top);
  318. tor_free(bot);
  319. tor_free(left);
  320. tor_free(right);
  321. }
  322. }
  323. /* This table is from crypto.c. The SP and PAD defines are different. */
  324. #define NOT_VALID_BASE64 255
  325. #define X NOT_VALID_BASE64
  326. #define SP NOT_VALID_BASE64
  327. #define PAD NOT_VALID_BASE64
  328. static const uint8_t base64_compare_table[256] = {
  329. X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
  330. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  331. SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
  332. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
  333. X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  334. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
  335. X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  336. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
  337. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  338. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  339. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  340. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  341. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  342. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  343. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  344. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  345. };
  346. /** Helper: Get the identity hash from a router line, assuming that the line
  347. * at least appears to be a router line and thus starts with "r ".
  348. *
  349. * If an identity hash is found, store it (without decoding it) in
  350. * <b>hash_out</b>, and return 0. On failure, return -1.
  351. */
  352. STATIC int
  353. get_id_hash(const cdline_t *line, cdline_t *hash_out)
  354. {
  355. if (line->len < 2)
  356. return -1;
  357. /* Skip the router name. */
  358. const char *hash = memchr(line->s + 2, ' ', line->len - 2);
  359. if (!hash) {
  360. return -1;
  361. }
  362. hash++;
  363. const char *hash_end = hash;
  364. /* Stop when the first non-base64 character is found. Use unsigned chars to
  365. * avoid negative indexes causing crashes.
  366. */
  367. while (base64_compare_table[*((unsigned char*)hash_end)]
  368. != NOT_VALID_BASE64 &&
  369. hash_end < line->s + line->len) {
  370. hash_end++;
  371. }
  372. /* Empty hash. */
  373. if (hash_end == hash) {
  374. return -1;
  375. }
  376. hash_out->s = hash;
  377. /* Always true because lines are limited to this length */
  378. tor_assert(hash_end >= hash);
  379. tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
  380. hash_out->len = (uint32_t)(hash_end - hash);
  381. return 0;
  382. }
  383. /** Helper: Check that a line is a valid router entry. We must at least be
  384. * able to fetch a proper identity hash from it for it to be valid.
  385. */
  386. STATIC int
  387. is_valid_router_entry(const cdline_t *line)
  388. {
  389. if (line->len < 2 || fast_memneq(line->s, "r ", 2))
  390. return 0;
  391. cdline_t tmp;
  392. return (get_id_hash(line, &tmp) == 0);
  393. }
  394. /** Helper: Find the next router line starting at the current position.
  395. * Assumes that cur is lower than the length of the smartlist, i.e. it is a
  396. * line within the bounds of the consensus. The only exception is when we
  397. * don't want to skip the first line, in which case cur will be -1.
  398. */
  399. STATIC int
  400. next_router(const smartlist_t *cons, int cur)
  401. {
  402. int len = smartlist_len(cons);
  403. tor_assert(cur >= -1 && cur < len);
  404. if (++cur >= len) {
  405. return len;
  406. }
  407. const cdline_t *line = smartlist_get(cons, cur);
  408. while (!is_valid_router_entry(line)) {
  409. if (++cur >= len) {
  410. return len;
  411. }
  412. line = smartlist_get(cons, cur);
  413. }
  414. return cur;
  415. }
  416. /** Helper: compare two base64-encoded identity hashes, which may be of
  417. * different lengths. Comparison ends when the first non-base64 char is found.
  418. */
  419. STATIC int
  420. base64cmp(const cdline_t *hash1, const cdline_t *hash2)
  421. {
  422. /* NULL is always lower, useful for last_hash which starts at NULL. */
  423. if (!hash1->s && !hash2->s) {
  424. return 0;
  425. }
  426. if (!hash1->s) {
  427. return -1;
  428. }
  429. if (!hash2->s) {
  430. return 1;
  431. }
  432. /* Don't index with a char; char may be signed. */
  433. const unsigned char *a = (unsigned char*)hash1->s;
  434. const unsigned char *b = (unsigned char*)hash2->s;
  435. const unsigned char *a_end = a + hash1->len;
  436. const unsigned char *b_end = b + hash2->len;
  437. while (1) {
  438. uint8_t av = base64_compare_table[*a];
  439. uint8_t bv = base64_compare_table[*b];
  440. if (av == NOT_VALID_BASE64) {
  441. if (bv == NOT_VALID_BASE64) {
  442. /* Both ended with exactly the same characters. */
  443. return 0;
  444. } else {
  445. /* hash2 goes on longer than hash1 and thus hash1 is lower. */
  446. return -1;
  447. }
  448. } else if (bv == NOT_VALID_BASE64) {
  449. /* hash1 goes on longer than hash2 and thus hash1 is greater. */
  450. return 1;
  451. } else if (av < bv) {
  452. /* The first difference shows that hash1 is lower. */
  453. return -1;
  454. } else if (av > bv) {
  455. /* The first difference shows that hash1 is greater. */
  456. return 1;
  457. } else {
  458. a++;
  459. b++;
  460. if (a == a_end) {
  461. if (b == b_end) {
  462. return 0;
  463. } else {
  464. return -1;
  465. }
  466. } else if (b == b_end) {
  467. return 1;
  468. }
  469. }
  470. }
  471. }
  472. /** Structure used to remember the previous and current identity hash of
  473. * the "r " lines in a consensus, to enforce well-ordering. */
  474. typedef struct router_id_iterator_t {
  475. cdline_t last_hash;
  476. cdline_t hash;
  477. } router_id_iterator_t;
  478. /**
  479. * Initializer for a router_id_iterator_t.
  480. */
  481. #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
  482. /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
  483. * the index to the next router line ("r ...") in the consensus, or to
  484. * an index one after the end of the list if there is no such line.
  485. *
  486. * Use <b>iter</b> to record the hash of the found router line, if any,
  487. * and to enforce ordering on the hashes. If the hashes are mis-ordered,
  488. * return -1. Else, return 0.
  489. **/
  490. static int
  491. find_next_router_line(const smartlist_t *cons,
  492. const char *consname,
  493. int *idxp,
  494. router_id_iterator_t *iter)
  495. {
  496. *idxp = next_router(cons, *idxp);
  497. if (*idxp < smartlist_len(cons)) {
  498. memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
  499. if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
  500. base64cmp(&iter->hash, &iter->last_hash) <= 0) {
  501. log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
  502. "the %s consensus doesn't have its router entries sorted "
  503. "properly.", consname);
  504. return -1;
  505. }
  506. }
  507. return 0;
  508. }
  509. /** Line-prefix indicating the beginning of the signatures section that we
  510. * intend to delete. */
  511. #define START_OF_SIGNATURES_SECTION "directory-signature "
  512. /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
  513. * to remove the signatures from it. If the footer is removed, return a
  514. * cdline_t containing a delete command to delete the footer, allocated in
  515. * <b>area</>. If no footer is removed, return NULL.
  516. *
  517. * We remove the signatures here because they are not themselves signed, and
  518. * as such there might be different encodings for them.
  519. */
  520. static cdline_t *
  521. preprocess_consensus(memarea_t *area,
  522. smartlist_t *cons)
  523. {
  524. int idx;
  525. int dirsig_idx = -1;
  526. for (idx = 0; idx < smartlist_len(cons); ++idx) {
  527. cdline_t *line = smartlist_get(cons, idx);
  528. if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
  529. dirsig_idx = idx;
  530. break;
  531. }
  532. }
  533. if (dirsig_idx >= 0) {
  534. char buf[64];
  535. while (smartlist_len(cons) > dirsig_idx)
  536. smartlist_del(cons, dirsig_idx);
  537. tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
  538. return cdline_linecpy(area, buf);
  539. } else {
  540. return NULL;
  541. }
  542. }
  543. /** Generate an ed diff as a smartlist from two consensuses, also given as
  544. * smartlists. Will return NULL if the diff could not be generated, which can
  545. * happen if any lines the script had to add matched "." or if the routers
  546. * were not properly ordered.
  547. *
  548. * All cdline_t objects in the resulting object are either references to lines
  549. * in one of the inputs, or are newly allocated lines in the provided memarea.
  550. *
  551. * This implementation is consensus-specific. To generate an ed diff for any
  552. * given input in quadratic time, you can replace all the code until the
  553. * navigation in reverse order with the following:
  554. *
  555. * int len1 = smartlist_len(cons1);
  556. * int len2 = smartlist_len(cons2);
  557. * bitarray_t *changed1 = bitarray_init_zero(len1);
  558. * bitarray_t *changed2 = bitarray_init_zero(len2);
  559. * cons1_sl = smartlist_slice(cons1, 0, -1);
  560. * cons2_sl = smartlist_slice(cons2, 0, -1);
  561. * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
  562. */
  563. STATIC smartlist_t *
  564. gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
  565. memarea_t *area)
  566. {
  567. smartlist_t *cons1 = smartlist_new();
  568. smartlist_add_all(cons1, cons1_orig);
  569. cdline_t *remove_trailer = preprocess_consensus(area, cons1);
  570. int len1 = smartlist_len(cons1);
  571. int len2 = smartlist_len(cons2);
  572. smartlist_t *result = smartlist_new();
  573. if (remove_trailer) {
  574. /* There's a delete-the-trailer line at the end, so add it here. */
  575. smartlist_add(result, remove_trailer);
  576. }
  577. /* Initialize the changed bitarrays to zero, so that calc_changes only needs
  578. * to set the ones that matter and leave the rest untouched.
  579. */
  580. bitarray_t *changed1 = bitarray_init_zero(len1);
  581. bitarray_t *changed2 = bitarray_init_zero(len2);
  582. int i1=-1, i2=-1;
  583. int start1=0, start2=0;
  584. /* To check that hashes are ordered properly */
  585. router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
  586. router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
  587. /* i1 and i2 are initialized at the first line of each consensus. They never
  588. * reach past len1 and len2 respectively, since next_router doesn't let that
  589. * happen. i1 and i2 are advanced by at least one line at each iteration as
  590. * long as they have not yet reached len1 and len2, so the loop is
  591. * guaranteed to end, and each pair of (i1,i2) will be inspected at most
  592. * once.
  593. */
  594. while (i1 < len1 || i2 < len2) {
  595. /* Advance each of the two navigation positions by one router entry if not
  596. * yet at the end.
  597. */
  598. if (i1 < len1) {
  599. if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
  600. goto error_cleanup;
  601. }
  602. }
  603. if (i2 < len2) {
  604. if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
  605. goto error_cleanup;
  606. }
  607. }
  608. /* If we have reached the end of both consensuses, there is no need to
  609. * compare hashes anymore, since this is the last iteration.
  610. */
  611. if (i1 < len1 || i2 < len2) {
  612. /* Keep on advancing the lower (by identity hash sorting) position until
  613. * we have two matching positions. The only other possible outcome is
  614. * that a lower position reaches the end of the consensus before it can
  615. * reach a hash that is no longer the lower one. Since there will always
  616. * be a lower hash for as long as the loop runs, one of the two indexes
  617. * will always be incremented, thus assuring that the loop must end
  618. * after a finite number of iterations. If that cannot be because said
  619. * consensus has already reached the end, both are extended to their
  620. * respecting ends since we are done.
  621. */
  622. int cmp = base64cmp(&iter1.hash, &iter2.hash);
  623. while (cmp != 0) {
  624. if (i1 < len1 && cmp < 0) {
  625. if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
  626. goto error_cleanup;
  627. }
  628. if (i1 == len1) {
  629. /* We finished the first consensus, so grab all the remaining
  630. * lines of the second consensus and finish up.
  631. */
  632. i2 = len2;
  633. break;
  634. }
  635. } else if (i2 < len2 && cmp > 0) {
  636. if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
  637. goto error_cleanup;
  638. }
  639. if (i2 == len2) {
  640. /* We finished the second consensus, so grab all the remaining
  641. * lines of the first consensus and finish up.
  642. */
  643. i1 = len1;
  644. break;
  645. }
  646. } else {
  647. i1 = len1;
  648. i2 = len2;
  649. break;
  650. }
  651. cmp = base64cmp(&iter1.hash, &iter2.hash);
  652. }
  653. }
  654. /* Make slices out of these chunks (up to the common router entry) and
  655. * calculate the changes for them.
  656. * Error if any of the two slices are longer than 10K lines. That should
  657. * never happen with any pair of real consensuses. Feeding more than 10K
  658. * lines to calc_changes would be very slow anyway.
  659. */
  660. #define MAX_LINE_COUNT (10000)
  661. if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
  662. log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
  663. "we found too few common router ids.");
  664. goto error_cleanup;
  665. }
  666. smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
  667. smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
  668. calc_changes(cons1_sl, cons2_sl, changed1, changed2);
  669. tor_free(cons1_sl);
  670. tor_free(cons2_sl);
  671. start1 = i1, start2 = i2;
  672. }
  673. /* Navigate the changes in reverse order and generate one ed command for
  674. * each chunk of changes.
  675. */
  676. i1=len1-1, i2=len2-1;
  677. char buf[128];
  678. while (i1 >= 0 || i2 >= 0) {
  679. int start1x, start2x, end1, end2, added, deleted;
  680. /* We are at a point were no changed bools are true, so just keep going. */
  681. if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
  682. !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
  683. if (i1 >= 0) {
  684. i1--;
  685. }
  686. if (i2 >= 0) {
  687. i2--;
  688. }
  689. continue;
  690. }
  691. end1 = i1, end2 = i2;
  692. /* Grab all contiguous changed lines */
  693. while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
  694. i1--;
  695. }
  696. while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
  697. i2--;
  698. }
  699. start1x = i1+1, start2x = i2+1;
  700. added = end2-i2, deleted = end1-i1;
  701. if (added == 0) {
  702. if (deleted == 1) {
  703. tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
  704. smartlist_add_linecpy(result, area, buf);
  705. } else {
  706. tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
  707. smartlist_add_linecpy(result, area, buf);
  708. }
  709. } else {
  710. int i;
  711. if (deleted == 0) {
  712. tor_snprintf(buf, sizeof(buf), "%ia", start1x);
  713. smartlist_add_linecpy(result, area, buf);
  714. } else if (deleted == 1) {
  715. tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
  716. smartlist_add_linecpy(result, area, buf);
  717. } else {
  718. tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
  719. smartlist_add_linecpy(result, area, buf);
  720. }
  721. for (i = start2x; i <= end2; ++i) {
  722. cdline_t *line = smartlist_get(cons2, i);
  723. if (line_str_eq(line, ".")) {
  724. log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
  725. "one of the lines to be added is \".\".");
  726. goto error_cleanup;
  727. }
  728. smartlist_add(result, line);
  729. }
  730. smartlist_add_linecpy(result, area, ".");
  731. }
  732. }
  733. smartlist_free(cons1);
  734. bitarray_free(changed1);
  735. bitarray_free(changed2);
  736. return result;
  737. error_cleanup:
  738. smartlist_free(cons1);
  739. bitarray_free(changed1);
  740. bitarray_free(changed2);
  741. smartlist_free(result);
  742. return NULL;
  743. }
  744. /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
  745. * store it in <b>num_out</b>. Advance <b>s</b> to the characer immediately
  746. * after the number. Return 0 on success, -1 on failure. */
  747. static int
  748. get_linenum(const char **s, int *num_out)
  749. {
  750. int ok;
  751. char *next;
  752. if (!TOR_ISDIGIT(**s)) {
  753. return -1;
  754. }
  755. *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
  756. if (ok && next) {
  757. *s = next;
  758. return 0;
  759. } else {
  760. return -1;
  761. }
  762. }
  763. /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
  764. * and return a new consensus, also as a line-based smartlist. Will return
  765. * NULL if the ed diff is not properly formatted.
  766. *
  767. * All cdline_t objects in the resulting object are references to lines
  768. * in one of the inputs; nothing is copied.
  769. */
  770. STATIC smartlist_t *
  771. apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
  772. int diff_starting_line)
  773. {
  774. int diff_len = smartlist_len(diff);
  775. int j = smartlist_len(cons1);
  776. smartlist_t *cons2 = smartlist_new();
  777. for (int i=diff_starting_line; i<diff_len; ++i) {
  778. const cdline_t *diff_cdline = smartlist_get(diff, i);
  779. char diff_line[128];
  780. if (diff_cdline->len > sizeof(diff_line) - 1) {
  781. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  782. "an ed command was far too long");
  783. goto error_cleanup;
  784. }
  785. /* Copy the line to make it nul-terminated. */
  786. memcpy(diff_line, diff_cdline->s, diff_cdline->len);
  787. diff_line[diff_cdline->len] = 0;
  788. const char *ptr = diff_line;
  789. int start = 0, end = 0;
  790. int had_range = 0;
  791. int end_was_eof = 0;
  792. if (get_linenum(&ptr, &start) < 0) {
  793. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  794. "an ed command was missing a line number.");
  795. goto error_cleanup;
  796. }
  797. if (*ptr == ',') {
  798. /* Two-item range */
  799. had_range = 1;
  800. ++ptr;
  801. if (*ptr == '$') {
  802. end_was_eof = 1;
  803. end = smartlist_len(cons1);
  804. ++ptr;
  805. } else if (get_linenum(&ptr, &end) < 0) {
  806. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  807. "an ed command was missing a range end line number.");
  808. goto error_cleanup;
  809. }
  810. /* Incoherent range. */
  811. if (end <= start) {
  812. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  813. "an invalid range was found in an ed command.");
  814. goto error_cleanup;
  815. }
  816. } else {
  817. /* We'll take <n1> as <n1>,<n1> for simplicity. */
  818. end = start;
  819. }
  820. if (end > j) {
  821. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  822. "its commands are not properly sorted in reverse order.");
  823. goto error_cleanup;
  824. }
  825. if (*ptr == '\0') {
  826. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  827. "a line with no ed command was found");
  828. goto error_cleanup;
  829. }
  830. if (*(ptr+1) != '\0') {
  831. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  832. "an ed command longer than one char was found.");
  833. goto error_cleanup;
  834. }
  835. char action = *ptr;
  836. switch (action) {
  837. case 'a':
  838. case 'c':
  839. case 'd':
  840. break;
  841. default:
  842. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  843. "an unrecognised ed command was found.");
  844. goto error_cleanup;
  845. }
  846. /** $ is not allowed with non-d actions. */
  847. if (end_was_eof && action != 'd') {
  848. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  849. "it wanted to use $ with a command other than delete");
  850. goto error_cleanup;
  851. }
  852. /* 'a' commands are not allowed to have ranges. */
  853. if (had_range && action == 'a') {
  854. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  855. "it wanted to add lines after a range.");
  856. goto error_cleanup;
  857. }
  858. /* Add unchanged lines. */
  859. for (; j && j > end; --j) {
  860. cdline_t *cons_line = smartlist_get(cons1, j-1);
  861. smartlist_add(cons2, cons_line);
  862. }
  863. /* Ignore removed lines. */
  864. if (action == 'c' || action == 'd') {
  865. while (--j >= start) {
  866. /* Skip line */
  867. }
  868. }
  869. /* Add new lines in reverse order, since it will all be reversed at the
  870. * end.
  871. */
  872. if (action == 'a' || action == 'c') {
  873. int added_end = i;
  874. i++; /* Skip the line with the range and command. */
  875. while (i < diff_len) {
  876. if (line_str_eq(smartlist_get(diff, i), ".")) {
  877. break;
  878. }
  879. if (++i == diff_len) {
  880. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  881. "it has lines to be inserted that don't end with a \".\".");
  882. goto error_cleanup;
  883. }
  884. }
  885. int added_i = i-1;
  886. /* It would make no sense to add zero new lines. */
  887. if (added_i == added_end) {
  888. log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
  889. "it has an ed command that tries to insert zero lines.");
  890. goto error_cleanup;
  891. }
  892. while (added_i > added_end) {
  893. cdline_t *added_line = smartlist_get(diff, added_i--);
  894. smartlist_add(cons2, added_line);
  895. }
  896. }
  897. }
  898. /* Add remaining unchanged lines. */
  899. for (; j > 0; --j) {
  900. cdline_t *cons_line = smartlist_get(cons1, j-1);
  901. smartlist_add(cons2, cons_line);
  902. }
  903. /* Reverse the whole thing since we did it from the end. */
  904. smartlist_reverse(cons2);
  905. return cons2;
  906. error_cleanup:
  907. smartlist_free(cons2);
  908. return NULL;
  909. }
  910. /** Generate a consensus diff as a smartlist from two given consensuses, also
  911. * as smartlists. Will return NULL if the consensus diff could not be
  912. * generated. Neither of the two consensuses are modified in any way, so it's
  913. * up to the caller to free their resources.
  914. */
  915. smartlist_t *
  916. consdiff_gen_diff(const smartlist_t *cons1,
  917. const smartlist_t *cons2,
  918. const consensus_digest_t *digests1,
  919. const consensus_digest_t *digests2,
  920. memarea_t *area)
  921. {
  922. smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
  923. /* ed diff could not be generated - reason already logged by gen_ed_diff. */
  924. if (!ed_diff) {
  925. goto error_cleanup;
  926. }
  927. /* See that the script actually produces what we want. */
  928. smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
  929. if (!ed_cons2) {
  930. /* LCOV_EXCL_START -- impossible if diff generation is correct */
  931. log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
  932. "the generated ed diff could not be tested to successfully generate "
  933. "the target consensus.");
  934. goto error_cleanup;
  935. /* LCOV_EXCL_STOP */
  936. }
  937. int cons2_eq = 1;
  938. if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
  939. SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
  940. const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
  941. if (! lines_eq(line1, line2) ) {
  942. cons2_eq = 0;
  943. break;
  944. }
  945. } SMARTLIST_FOREACH_END(line1);
  946. } else {
  947. cons2_eq = 0;
  948. }
  949. smartlist_free(ed_cons2);
  950. if (!cons2_eq) {
  951. /* LCOV_EXCL_START -- impossible if diff generation is correct. */
  952. log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
  953. "the generated ed diff did not generate the target consensus "
  954. "successfully when tested.");
  955. goto error_cleanup;
  956. /* LCOV_EXCL_STOP */
  957. }
  958. char cons1_hash_hex[HEX_DIGEST256_LEN+1];
  959. char cons2_hash_hex[HEX_DIGEST256_LEN+1];
  960. base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
  961. (const char*)digests1->sha3_256, DIGEST256_LEN);
  962. base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
  963. (const char*)digests2->sha3_256, DIGEST256_LEN);
  964. /* Create the resulting consensus diff. */
  965. char buf[160];
  966. smartlist_t *result = smartlist_new();
  967. tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
  968. smartlist_add_linecpy(result, area, buf);
  969. tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
  970. cons1_hash_hex, cons2_hash_hex);
  971. smartlist_add_linecpy(result, area, buf);
  972. smartlist_add_all(result, ed_diff);
  973. smartlist_free(ed_diff);
  974. return result;
  975. error_cleanup:
  976. if (ed_diff) {
  977. /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
  978. smartlist_free(ed_diff);
  979. /* LCOV_EXCL_STOP */
  980. }
  981. return NULL;
  982. }
  983. /** Fetch the digest of the base consensus in the consensus diff, encoded in
  984. * base16 as found in the diff itself. digest1_out and digest2_out must be of
  985. * length DIGEST256_LEN or larger if not NULL.
  986. */
  987. int
  988. consdiff_get_digests(const smartlist_t *diff,
  989. char *digest1_out,
  990. char *digest2_out)
  991. {
  992. smartlist_t *hash_words = NULL;
  993. const cdline_t *format;
  994. char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
  995. char *cons1_hash_hex, *cons2_hash_hex;
  996. if (smartlist_len(diff) < 2) {
  997. log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
  998. goto error_cleanup;
  999. }
  1000. /* Check that it's the format and version we know. */
  1001. format = smartlist_get(diff, 0);
  1002. if (!line_str_eq(format, ns_diff_version)) {
  1003. log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
  1004. goto error_cleanup;
  1005. }
  1006. /* Grab the base16 digests. */
  1007. hash_words = smartlist_new();
  1008. {
  1009. const cdline_t *line2 = smartlist_get(diff, 1);
  1010. char *h = tor_memdup_nulterm(line2->s, line2->len);
  1011. smartlist_split_string(hash_words, h, " ", 0, 0);
  1012. tor_free(h);
  1013. }
  1014. /* There have to be three words, the first of which must be hash_token. */
  1015. if (smartlist_len(hash_words) != 3 ||
  1016. strcmp(smartlist_get(hash_words, 0), hash_token)) {
  1017. log_info(LD_CONSDIFF, "The provided consensus diff does not include "
  1018. "the necessary digests.");
  1019. goto error_cleanup;
  1020. }
  1021. /* Expected hashes as found in the consensus diff header. They must be of
  1022. * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
  1023. * If any of the decodings fail, error to make sure that the hashes are
  1024. * proper base16-encoded digests.
  1025. */
  1026. cons1_hash_hex = smartlist_get(hash_words, 1);
  1027. cons2_hash_hex = smartlist_get(hash_words, 2);
  1028. if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
  1029. strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
  1030. log_info(LD_CONSDIFF, "The provided consensus diff includes "
  1031. "base16-encoded digests of incorrect size.");
  1032. goto error_cleanup;
  1033. }
  1034. if (base16_decode(cons1_hash, DIGEST256_LEN,
  1035. cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
  1036. base16_decode(cons2_hash, DIGEST256_LEN,
  1037. cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
  1038. log_info(LD_CONSDIFF, "The provided consensus diff includes "
  1039. "malformed digests.");
  1040. goto error_cleanup;
  1041. }
  1042. if (digest1_out) {
  1043. memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
  1044. }
  1045. if (digest2_out) {
  1046. memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
  1047. }
  1048. SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
  1049. smartlist_free(hash_words);
  1050. return 0;
  1051. error_cleanup:
  1052. if (hash_words) {
  1053. SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
  1054. smartlist_free(hash_words);
  1055. }
  1056. return 1;
  1057. }
  1058. /** Apply the consensus diff to the given consensus and return a new
  1059. * consensus, also as a line-based smartlist. Will return NULL if the diff
  1060. * could not be applied. Neither the consensus nor the diff are modified in
  1061. * any way, so it's up to the caller to free their resources.
  1062. */
  1063. char *
  1064. consdiff_apply_diff(const smartlist_t *cons1,
  1065. const smartlist_t *diff,
  1066. const consensus_digest_t *digests1)
  1067. {
  1068. smartlist_t *cons2 = NULL;
  1069. char *cons2_str = NULL;
  1070. char e_cons1_hash[DIGEST256_LEN];
  1071. char e_cons2_hash[DIGEST256_LEN];
  1072. if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
  1073. goto error_cleanup;
  1074. }
  1075. /* See that the consensus that was given to us matches its hash. */
  1076. if (!consensus_digest_eq(digests1->sha3_256,
  1077. (const uint8_t*)e_cons1_hash)) {
  1078. char hex_digest1[HEX_DIGEST256_LEN+1];
  1079. char e_hex_digest1[HEX_DIGEST256_LEN+1];
  1080. log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
  1081. "the base consensus doesn't match the digest as found in "
  1082. "the consensus diff header.");
  1083. base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
  1084. (const char *)digests1->sha3_256, DIGEST256_LEN);
  1085. base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
  1086. e_cons1_hash, DIGEST256_LEN);
  1087. log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
  1088. hex_digest1, e_hex_digest1);
  1089. goto error_cleanup;
  1090. }
  1091. /* Grab the ed diff and calculate the resulting consensus. */
  1092. /* Skip the first two lines. */
  1093. cons2 = apply_ed_diff(cons1, diff, 2);
  1094. /* ed diff could not be applied - reason already logged by apply_ed_diff. */
  1095. if (!cons2) {
  1096. goto error_cleanup;
  1097. }
  1098. cons2_str = consensus_join_lines(cons2);
  1099. consensus_digest_t cons2_digests;
  1100. if (consensus_compute_digest(cons2_str, &cons2_digests) < 0) {
  1101. /* LCOV_EXCL_START -- digest can't fail */
  1102. log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
  1103. "resulting from applying a consensus diff.");
  1104. goto error_cleanup;
  1105. /* LCOV_EXCL_STOP */
  1106. }
  1107. /* See that the resulting consensus matches its hash. */
  1108. if (!consensus_digest_eq(cons2_digests.sha3_256,
  1109. (const uint8_t*)e_cons2_hash)) {
  1110. log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
  1111. "the resulting consensus doesn't match the digest as found in "
  1112. "the consensus diff header.");
  1113. char hex_digest2[HEX_DIGEST256_LEN+1];
  1114. char e_hex_digest2[HEX_DIGEST256_LEN+1];
  1115. base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
  1116. (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
  1117. base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
  1118. e_cons2_hash, DIGEST256_LEN);
  1119. log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
  1120. hex_digest2, e_hex_digest2);
  1121. goto error_cleanup;
  1122. }
  1123. goto done;
  1124. error_cleanup:
  1125. tor_free(cons2_str); /* Sets it to NULL */
  1126. done:
  1127. if (cons2) {
  1128. smartlist_free(cons2);
  1129. }
  1130. return cons2_str;
  1131. }
  1132. /** Any consensus line longer than this means that the input is invalid. */
  1133. #define CONSENSUS_LINE_MAX_LEN (1<<20)
  1134. /**
  1135. * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
  1136. * that line (without trailing newline) to <b>out</b>. Return -1 if there are
  1137. * any non-NL terminated lines; 0 otherwise.
  1138. *
  1139. * Unlike tor_split_lines, this function avoids ambiguity on its
  1140. * handling of a final line that isn't NL-terminated.
  1141. *
  1142. * All cdline_t objects are allocated in the provided memarea. Strings
  1143. * are not copied: if <b>s</b> changes or becomes invalid, then all
  1144. * generated cdlines will become invalid.
  1145. */
  1146. STATIC int
  1147. consensus_split_lines(smartlist_t *out, const char *s, memarea_t *area)
  1148. {
  1149. const char *end_of_str = s + strlen(s);
  1150. tor_assert(*end_of_str == '\0');
  1151. while (*s) {
  1152. const char *eol = memchr(s, '\n', end_of_str - s);
  1153. if (!eol) {
  1154. /* File doesn't end with newline. */
  1155. return -1;
  1156. }
  1157. if (eol - s > CONSENSUS_LINE_MAX_LEN) {
  1158. /* Line is far too long. */
  1159. return -1;
  1160. }
  1161. cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
  1162. line->s = s;
  1163. line->len = (uint32_t)(eol - s);
  1164. smartlist_add(out, line);
  1165. s = eol+1;
  1166. }
  1167. return 0;
  1168. }
  1169. /** Given a list of cdline_t, return a newly allocated string containing
  1170. * all of the lines, terminated with NL, concatenated.
  1171. *
  1172. * Unlike smartlist_join_strings(), avoids lossy operations on empty
  1173. * lists. */
  1174. static char *
  1175. consensus_join_lines(const smartlist_t *inp)
  1176. {
  1177. size_t n = 0;
  1178. SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
  1179. n += 1;
  1180. char *result = tor_malloc(n);
  1181. char *out = result;
  1182. SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
  1183. memcpy(out, cdline->s, cdline->len);
  1184. out += cdline->len;
  1185. *out++ = '\n';
  1186. } SMARTLIST_FOREACH_END(cdline);
  1187. *out++ = '\0';
  1188. tor_assert(out == result+n);
  1189. return result;
  1190. }
  1191. /** Given two consensus documents, try to compute a diff between them. On
  1192. * success, retun a newly allocated string containing that diff. On failure,
  1193. * return NULL. */
  1194. char *
  1195. consensus_diff_generate(const char *cons1,
  1196. const char *cons2)
  1197. {
  1198. consensus_digest_t d1, d2;
  1199. smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
  1200. int r1, r2;
  1201. char *result = NULL;
  1202. r1 = consensus_compute_digest_as_signed(cons1, &d1);
  1203. r2 = consensus_compute_digest(cons2, &d2);
  1204. if (BUG(r1 < 0 || r2 < 0))
  1205. return NULL; // LCOV_EXCL_LINE
  1206. memarea_t *area = memarea_new();
  1207. lines1 = smartlist_new();
  1208. lines2 = smartlist_new();
  1209. if (consensus_split_lines(lines1, cons1, area) < 0)
  1210. goto done;
  1211. if (consensus_split_lines(lines2, cons2, area) < 0)
  1212. goto done;
  1213. result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
  1214. done:
  1215. if (result_lines) {
  1216. result = consensus_join_lines(result_lines);
  1217. smartlist_free(result_lines);
  1218. }
  1219. memarea_drop_all(area);
  1220. smartlist_free(lines1);
  1221. smartlist_free(lines2);
  1222. return result;
  1223. }
  1224. /** Given a consensus document and a diff, try to apply the diff to the
  1225. * consensus. On success return a newly allocated string containing the new
  1226. * consensus. On failure, return NULL. */
  1227. char *
  1228. consensus_diff_apply(const char *consensus,
  1229. const char *diff)
  1230. {
  1231. consensus_digest_t d1;
  1232. smartlist_t *lines1 = NULL, *lines2 = NULL;
  1233. int r1;
  1234. char *result = NULL;
  1235. memarea_t *area = memarea_new();
  1236. r1 = consensus_compute_digest_as_signed(consensus, &d1);
  1237. if (BUG(r1 < 0))
  1238. return NULL; // LCOV_EXCL_LINE
  1239. lines1 = smartlist_new();
  1240. lines2 = smartlist_new();
  1241. if (consensus_split_lines(lines1, consensus, area) < 0)
  1242. goto done;
  1243. if (consensus_split_lines(lines2, diff, area) < 0)
  1244. goto done;
  1245. result = consdiff_apply_diff(lines1, lines2, &d1);
  1246. done:
  1247. smartlist_free(lines1);
  1248. smartlist_free(lines2);
  1249. memarea_drop_all(area);
  1250. return result;
  1251. }
  1252. /** Return true iff, based on its header, <b>document</b> is likely
  1253. * to be a consensus diff. */
  1254. int
  1255. looks_like_a_consensus_diff(const char *document, size_t len)
  1256. {
  1257. return (len >= strlen(ns_diff_version) &&
  1258. fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
  1259. }