binascii.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. /* Copyright (c) 2001, Matej Pfajfar.
  2. * Copyright (c) 2001-2004, Roger Dingledine.
  3. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  4. * Copyright (c) 2007-2019, The Tor Project, Inc. */
  5. /* See LICENSE for licensing information */
  6. /**
  7. * \file binascii.c
  8. *
  9. * \brief Miscellaneous functions for encoding and decoding various things
  10. * in base{16,32,64}.
  11. */
  12. #include "orconfig.h"
  13. #include "lib/encoding/binascii.h"
  14. #include "lib/log/log.h"
  15. #include "lib/log/util_bug.h"
  16. #include "lib/cc/torint.h"
  17. #include "lib/string/compat_ctype.h"
  18. #include "lib/intmath/muldiv.h"
  19. #include "lib/malloc/malloc.h"
  20. #include <stddef.h>
  21. #include <string.h>
  22. #include <stdlib.h>
  23. /** Return a pointer to a NUL-terminated hexadecimal string encoding
  24. * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
  25. * result does not need to be deallocated, but repeated calls to
  26. * hex_str will trash old results.
  27. */
  28. const char *
  29. hex_str(const char *from, size_t fromlen)
  30. {
  31. static char buf[65];
  32. if (fromlen>(sizeof(buf)-1)/2)
  33. fromlen = (sizeof(buf)-1)/2;
  34. base16_encode(buf,sizeof(buf),from,fromlen);
  35. return buf;
  36. }
  37. /* Return the base32 encoded size in bytes using the source length srclen.
  38. *
  39. * (WATCH OUT: This API counts the terminating NUL byte, but
  40. * base64_encode_size does not.)
  41. */
  42. size_t
  43. base32_encoded_size(size_t srclen)
  44. {
  45. size_t enclen;
  46. tor_assert(srclen < SIZE_T_CEILING / 8);
  47. enclen = BASE32_NOPAD_BUFSIZE(srclen);
  48. tor_assert(enclen < INT_MAX && enclen > srclen);
  49. return enclen;
  50. }
  51. /** Implements base32 encoding as in RFC 4648. */
  52. void
  53. base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
  54. {
  55. unsigned int i, v, u;
  56. size_t nbits = srclen * 8;
  57. size_t bit;
  58. /* We need enough space for the encoded data and the extra NUL byte. */
  59. tor_assert(base32_encoded_size(srclen) <= destlen);
  60. tor_assert(destlen < SIZE_T_CEILING);
  61. /* Make sure we leave no uninitialized data in the destination buffer. */
  62. memset(dest, 0, destlen);
  63. for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
  64. /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
  65. size_t idx = bit / 8;
  66. v = ((uint8_t)src[idx]) << 8;
  67. if (idx+1 < srclen)
  68. v += (uint8_t)src[idx+1];
  69. /* set u to the 5-bit value at the bit'th bit of buf. */
  70. u = (v >> (11-(bit%8))) & 0x1F;
  71. dest[i] = BASE32_CHARS[u];
  72. }
  73. dest[i] = '\0';
  74. }
  75. /** Implements base32 decoding as in RFC 4648.
  76. * Return the number of bytes decoded if successful; -1 otherwise.
  77. */
  78. int
  79. base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  80. {
  81. /* XXXX we might want to rewrite this along the lines of base64_decode, if
  82. * it ever shows up in the profile. */
  83. unsigned int i;
  84. size_t nbits, j, bit;
  85. char *tmp;
  86. nbits = ((srclen * 5) / 8) * 8;
  87. tor_assert(srclen < SIZE_T_CEILING / 5);
  88. tor_assert((nbits/8) <= destlen); /* We need enough space. */
  89. tor_assert(destlen < SIZE_T_CEILING);
  90. /* Make sure we leave no uninitialized data in the destination buffer. */
  91. memset(dest, 0, destlen);
  92. /* Convert base32 encoded chars to the 5-bit values that they represent. */
  93. tmp = tor_malloc_zero(srclen);
  94. for (j = 0; j < srclen; ++j) {
  95. if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
  96. else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
  97. else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
  98. else {
  99. log_warn(LD_GENERAL, "illegal character in base32 encoded string");
  100. tor_free(tmp);
  101. return -1;
  102. }
  103. }
  104. /* Assemble result byte-wise by applying five possible cases. */
  105. for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
  106. switch (bit % 40) {
  107. case 0:
  108. dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
  109. (((uint8_t)tmp[(bit/5)+1]) >> 2);
  110. break;
  111. case 8:
  112. dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
  113. (((uint8_t)tmp[(bit/5)+1]) << 1) +
  114. (((uint8_t)tmp[(bit/5)+2]) >> 4);
  115. break;
  116. case 16:
  117. dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
  118. (((uint8_t)tmp[(bit/5)+1]) >> 1);
  119. break;
  120. case 24:
  121. dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
  122. (((uint8_t)tmp[(bit/5)+1]) << 2) +
  123. (((uint8_t)tmp[(bit/5)+2]) >> 3);
  124. break;
  125. case 32:
  126. dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
  127. ((uint8_t)tmp[(bit/5)+1]);
  128. break;
  129. }
  130. }
  131. memset(tmp, 0, srclen); /* on the heap, this should be safe */
  132. tor_free(tmp);
  133. tmp = NULL;
  134. return i;
  135. }
  136. #define BASE64_OPENSSL_LINELEN 64
  137. /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
  138. * bytes.
  139. *
  140. * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
  141. * but base32_encoded_size does.)
  142. *
  143. * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
  144. * of the encoded output as multiline output (64 character, `\n' terminated
  145. * lines).
  146. */
  147. size_t
  148. base64_encode_size(size_t srclen, int flags)
  149. {
  150. size_t enclen;
  151. /* Use INT_MAX for overflow checking because base64_encode() returns int. */
  152. tor_assert(srclen < INT_MAX);
  153. tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
  154. enclen = BASE64_LEN(srclen);
  155. if (flags & BASE64_ENCODE_MULTILINE)
  156. enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
  157. tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
  158. return enclen;
  159. }
  160. /** Return an upper bound on the number of bytes that might be needed to hold
  161. * the data from decoding the base64 string <b>srclen</b>. This is only an
  162. * upper bound, since some part of the base64 string might be padding or
  163. * space. */
  164. size_t
  165. base64_decode_maxsize(size_t srclen)
  166. {
  167. tor_assert(srclen < INT_MAX / 3);
  168. return CEIL_DIV(srclen * 3, 4);
  169. }
  170. /** Internal table mapping 6 bit values to the Base64 alphabet. */
  171. static const char base64_encode_table[64] = {
  172. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
  173. 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
  174. 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  175. 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
  176. 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  177. 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
  178. 'w', 'x', 'y', 'z', '0', '1', '2', '3',
  179. '4', '5', '6', '7', '8', '9', '+', '/'
  180. };
  181. /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
  182. * the result into <b>dest</b>, if it will fit within <b>destlen</b>
  183. * bytes. Return the number of bytes written on success; -1 if
  184. * destlen is too short, or other failure.
  185. *
  186. * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
  187. * output in multiline format (64 character, `\n' terminated lines).
  188. */
  189. int
  190. base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
  191. int flags)
  192. {
  193. const unsigned char *usrc = (unsigned char *)src;
  194. const unsigned char *eous = usrc + srclen;
  195. char *d = dest;
  196. uint32_t n = 0;
  197. size_t linelen = 0;
  198. size_t enclen;
  199. int n_idx = 0;
  200. if (!src || !dest)
  201. return -1;
  202. /* Ensure that there is sufficient space, including the NUL. */
  203. enclen = base64_encode_size(srclen, flags);
  204. if (destlen < enclen + 1)
  205. return -1;
  206. if (destlen > SIZE_T_CEILING)
  207. return -1;
  208. if (enclen > INT_MAX)
  209. return -1;
  210. /* Make sure we leave no uninitialized data in the destination buffer. */
  211. memset(dest, 0, destlen);
  212. /* XXX/Yawning: If this ends up being too slow, this can be sped up
  213. * by separating the multiline format case and the normal case, and
  214. * processing 48 bytes of input at a time when newlines are desired.
  215. */
  216. #define ENCODE_CHAR(ch) \
  217. STMT_BEGIN \
  218. *d++ = ch; \
  219. if (flags & BASE64_ENCODE_MULTILINE) { \
  220. if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
  221. linelen = 0; \
  222. *d++ = '\n'; \
  223. } \
  224. } \
  225. STMT_END
  226. #define ENCODE_N(idx) \
  227. ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
  228. #define ENCODE_PAD() ENCODE_CHAR('=')
  229. /* Iterate over all the bytes in src. Each one will add 8 bits to the
  230. * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
  231. * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
  232. */
  233. for ( ; usrc < eous; ++usrc) {
  234. n = (n << 8) | *usrc;
  235. if ((++n_idx) == 3) {
  236. ENCODE_N(0);
  237. ENCODE_N(1);
  238. ENCODE_N(2);
  239. ENCODE_N(3);
  240. n_idx = 0;
  241. n = 0;
  242. }
  243. }
  244. switch (n_idx) {
  245. case 0:
  246. /* 0 leftover bits, no pading to add. */
  247. break;
  248. case 1:
  249. /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
  250. * by 2 padding characters.
  251. */
  252. n <<= 4;
  253. ENCODE_N(2);
  254. ENCODE_N(3);
  255. ENCODE_PAD();
  256. ENCODE_PAD();
  257. break;
  258. case 2:
  259. /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
  260. * by 1 padding character.
  261. */
  262. n <<= 2;
  263. ENCODE_N(1);
  264. ENCODE_N(2);
  265. ENCODE_N(3);
  266. ENCODE_PAD();
  267. break;
  268. // LCOV_EXCL_START -- we can't reach this point, because we enforce
  269. // 0 <= ncov_idx < 3 in the loop above.
  270. default:
  271. /* Something went catastrophically wrong. */
  272. tor_fragile_assert();
  273. return -1;
  274. // LCOV_EXCL_STOP
  275. }
  276. #undef ENCODE_N
  277. #undef ENCODE_PAD
  278. #undef ENCODE_CHAR
  279. /* Multiline output always includes at least one newline. */
  280. if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
  281. *d++ = '\n';
  282. tor_assert(d - dest == (ptrdiff_t)enclen);
  283. *d++ = '\0'; /* NUL terminate the output. */
  284. return (int) enclen;
  285. }
  286. /** As base64_encode, but do not add any internal spaces, and remove external
  287. * padding from the output stream.
  288. * dest must be at least base64_encode_size(srclen, 0), including space for
  289. * the removed external padding. */
  290. int
  291. base64_encode_nopad(char *dest, size_t destlen,
  292. const uint8_t *src, size_t srclen)
  293. {
  294. int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
  295. if (n <= 0)
  296. return n;
  297. tor_assert((size_t)n < destlen && dest[n] == 0);
  298. char *in, *out;
  299. in = out = dest;
  300. while (*in) {
  301. if (*in == '=' || *in == '\n') {
  302. ++in;
  303. } else {
  304. *out++ = *in++;
  305. }
  306. }
  307. *out = 0;
  308. tor_assert(out - dest <= INT_MAX);
  309. return (int)(out - dest);
  310. }
  311. #undef BASE64_OPENSSL_LINELEN
  312. /** @{ */
  313. /** Special values used for the base64_decode_table */
  314. #define X 255
  315. #define SP 64
  316. #define PAD 65
  317. /** @} */
  318. /** Internal table mapping byte values to what they represent in base64.
  319. * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
  320. * skipped. Xs are invalid and must not appear in base64. PAD indicates
  321. * end-of-string. */
  322. static const uint8_t base64_decode_table[256] = {
  323. X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
  324. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  325. SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
  326. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
  327. X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  328. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
  329. X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  330. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
  331. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  332. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  333. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  334. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  335. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  336. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  337. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  338. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  339. };
  340. /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
  341. * the result into <b>dest</b>, if it will fit within <b>destlen</b>
  342. * bytes. Return the number of bytes written on success; -1 if
  343. * destlen is too short, or other failure.
  344. *
  345. * NOTE 1: destlen is checked conservatively, as though srclen contained no
  346. * spaces or padding.
  347. *
  348. * NOTE 2: This implementation does not check for the correct number of
  349. * padding "=" characters at the end of the string, and does not check
  350. * for internal padding characters.
  351. */
  352. int
  353. base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  354. {
  355. const char *eos = src+srclen;
  356. uint32_t n=0;
  357. int n_idx=0;
  358. size_t di = 0;
  359. if (destlen > INT_MAX)
  360. return -1;
  361. /* Make sure we leave no uninitialized data in the destination buffer. */
  362. memset(dest, 0, destlen);
  363. /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
  364. * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
  365. * 24 bits, batch them into 3 bytes and flush those bytes to dest.
  366. */
  367. for ( ; src < eos; ++src) {
  368. unsigned char c = (unsigned char) *src;
  369. uint8_t v = base64_decode_table[c];
  370. switch (v) {
  371. case X:
  372. /* This character isn't allowed in base64. */
  373. return -1;
  374. case SP:
  375. /* This character is whitespace, and has no effect. */
  376. continue;
  377. case PAD:
  378. /* We've hit an = character: the data is over. */
  379. goto end_of_loop;
  380. default:
  381. /* We have an actual 6-bit value. Append it to the bits in n. */
  382. n = (n<<6) | v;
  383. if ((++n_idx) == 4) {
  384. /* We've accumulated 24 bits in n. Flush them. */
  385. if (destlen < 3 || di > destlen - 3)
  386. return -1;
  387. dest[di++] = (n>>16);
  388. dest[di++] = (n>>8) & 0xff;
  389. dest[di++] = (n) & 0xff;
  390. n_idx = 0;
  391. n = 0;
  392. }
  393. }
  394. }
  395. end_of_loop:
  396. /* If we have leftover bits, we need to cope. */
  397. switch (n_idx) {
  398. case 0:
  399. default:
  400. /* No leftover bits. We win. */
  401. break;
  402. case 1:
  403. /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
  404. return -1;
  405. case 2:
  406. /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
  407. if (destlen < 1 || di > destlen - 1)
  408. return -1;
  409. dest[di++] = n >> 4;
  410. break;
  411. case 3:
  412. /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
  413. if (destlen < 2 || di > destlen - 2)
  414. return -1;
  415. dest[di++] = n >> 10;
  416. dest[di++] = n >> 2;
  417. }
  418. tor_assert(di <= destlen);
  419. return (int)di;
  420. }
  421. #undef X
  422. #undef SP
  423. #undef PAD
  424. /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
  425. * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
  426. * <b>dest</b>.
  427. */
  428. void
  429. base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
  430. {
  431. const char *end;
  432. char *cp;
  433. tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
  434. tor_assert(destlen >= BASE16_BUFSIZE(srclen));
  435. tor_assert(destlen < SIZE_T_CEILING);
  436. /* Make sure we leave no uninitialized data in the destination buffer. */
  437. memset(dest, 0, destlen);
  438. cp = dest;
  439. end = src+srclen;
  440. while (src<end) {
  441. *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
  442. *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
  443. ++src;
  444. }
  445. *cp = '\0';
  446. }
  447. /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
  448. * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
  449. * Return the number of bytes decoded on success, -1 on failure. If
  450. * <b>destlen</b> is greater than INT_MAX or less than half of
  451. * <b>srclen</b>, -1 is returned. */
  452. int
  453. base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  454. {
  455. const char *end;
  456. char *dest_orig = dest;
  457. int v1,v2;
  458. if ((srclen % 2) != 0)
  459. return -1;
  460. if (destlen < srclen/2 || destlen > INT_MAX)
  461. return -1;
  462. /* Make sure we leave no uninitialized data in the destination buffer. */
  463. memset(dest, 0, destlen);
  464. end = src+srclen;
  465. while (src<end) {
  466. v1 = hex_decode_digit(*src);
  467. v2 = hex_decode_digit(*(src+1));
  468. if (v1<0||v2<0)
  469. return -1;
  470. *(uint8_t*)dest = (v1<<4)|v2;
  471. ++dest;
  472. src+=2;
  473. }
  474. tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
  475. return (int) (dest-dest_orig);
  476. }