util_format.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. /* Copyright (c) 2001, Matej Pfajfar.
  2. * Copyright (c) 2001-2004, Roger Dingledine.
  3. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  4. * Copyright (c) 2007-2015, The Tor Project, Inc. */
  5. /* See LICENSE for licensing information */
  6. #include "orconfig.h"
  7. #include "torlog.h"
  8. #include "util.h"
  9. #include "util_format.h"
  10. #include "torint.h"
  11. #include <stddef.h>
  12. #include <string.h>
  13. #include <stdlib.h>
  14. /** Implements base32 encoding as in RFC 4648. Limitation: Requires
  15. * that srclen*8 is a multiple of 5.
  16. */
  17. void
  18. base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
  19. {
  20. unsigned int i, v, u;
  21. size_t nbits = srclen * 8, bit;
  22. tor_assert(srclen < SIZE_T_CEILING/8);
  23. tor_assert((nbits%5) == 0); /* We need an even multiple of 5 bits. */
  24. tor_assert((nbits/5)+1 <= destlen); /* We need enough space. */
  25. tor_assert(destlen < SIZE_T_CEILING);
  26. for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
  27. /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
  28. v = ((uint8_t)src[bit/8]) << 8;
  29. if (bit+5<nbits) v += (uint8_t)src[(bit/8)+1];
  30. /* set u to the 5-bit value at the bit'th bit of src. */
  31. u = (v >> (11-(bit%8))) & 0x1F;
  32. dest[i] = BASE32_CHARS[u];
  33. }
  34. dest[i] = '\0';
  35. }
  36. /** Implements base32 decoding as in RFC 4648. Limitation: Requires
  37. * that srclen*5 is a multiple of 8. Returns 0 if successful, -1 otherwise.
  38. */
  39. int
  40. base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  41. {
  42. /* XXXX we might want to rewrite this along the lines of base64_decode, if
  43. * it ever shows up in the profile. */
  44. unsigned int i;
  45. size_t nbits, j, bit;
  46. char *tmp;
  47. nbits = srclen * 5;
  48. tor_assert(srclen < SIZE_T_CEILING / 5);
  49. tor_assert((nbits%8) == 0); /* We need an even multiple of 8 bits. */
  50. tor_assert((nbits/8) <= destlen); /* We need enough space. */
  51. tor_assert(destlen < SIZE_T_CEILING);
  52. memset(dest, 0, destlen);
  53. /* Convert base32 encoded chars to the 5-bit values that they represent. */
  54. tmp = tor_malloc_zero(srclen);
  55. for (j = 0; j < srclen; ++j) {
  56. if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
  57. else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
  58. else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
  59. else {
  60. log_warn(LD_BUG, "illegal character in base32 encoded string");
  61. tor_free(tmp);
  62. return -1;
  63. }
  64. }
  65. /* Assemble result byte-wise by applying five possible cases. */
  66. for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
  67. switch (bit % 40) {
  68. case 0:
  69. dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
  70. (((uint8_t)tmp[(bit/5)+1]) >> 2);
  71. break;
  72. case 8:
  73. dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
  74. (((uint8_t)tmp[(bit/5)+1]) << 1) +
  75. (((uint8_t)tmp[(bit/5)+2]) >> 4);
  76. break;
  77. case 16:
  78. dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
  79. (((uint8_t)tmp[(bit/5)+1]) >> 1);
  80. break;
  81. case 24:
  82. dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
  83. (((uint8_t)tmp[(bit/5)+1]) << 2) +
  84. (((uint8_t)tmp[(bit/5)+2]) >> 3);
  85. break;
  86. case 32:
  87. dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
  88. ((uint8_t)tmp[(bit/5)+1]);
  89. break;
  90. }
  91. }
  92. memset(tmp, 0, srclen); /* on the heap, this should be safe */
  93. tor_free(tmp);
  94. tmp = NULL;
  95. return 0;
  96. }
  97. #define BASE64_OPENSSL_LINELEN 64
  98. /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
  99. * bytes.
  100. *
  101. * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
  102. * of the encoded output as multiline output (64 character, `\n' terminated
  103. * lines).
  104. */
  105. size_t
  106. base64_encode_size(size_t srclen, int flags)
  107. {
  108. size_t enclen;
  109. tor_assert(srclen < INT_MAX);
  110. if (srclen == 0)
  111. return 0;
  112. enclen = ((srclen - 1) / 3) * 4 + 4;
  113. if (flags & BASE64_ENCODE_MULTILINE) {
  114. size_t remainder = enclen % BASE64_OPENSSL_LINELEN;
  115. enclen += enclen / BASE64_OPENSSL_LINELEN;
  116. if (remainder)
  117. enclen++;
  118. }
  119. tor_assert(enclen < INT_MAX && enclen > srclen);
  120. return enclen;
  121. }
  122. /** Internal table mapping 6 bit values to the Base64 alphabet. */
  123. static const char base64_encode_table[64] = {
  124. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
  125. 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
  126. 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  127. 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
  128. 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  129. 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
  130. 'w', 'x', 'y', 'z', '0', '1', '2', '3',
  131. '4', '5', '6', '7', '8', '9', '+', '/'
  132. };
  133. /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
  134. * the result into <b>dest</b>, if it will fit within <b>destlen</b>
  135. * bytes. Return the number of bytes written on success; -1 if
  136. * destlen is too short, or other failure.
  137. *
  138. * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
  139. * output in multiline format (64 character, `\n' terminated lines).
  140. */
  141. int
  142. base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
  143. int flags)
  144. {
  145. const unsigned char *usrc = (unsigned char *)src;
  146. const unsigned char *eous = usrc + srclen;
  147. char *d = dest;
  148. uint32_t n = 0;
  149. size_t linelen = 0;
  150. size_t enclen;
  151. int n_idx = 0;
  152. if (!src || !dest)
  153. return -1;
  154. /* Ensure that there is sufficient space, including the NUL. */
  155. enclen = base64_encode_size(srclen, flags);
  156. if (destlen < enclen + 1)
  157. return -1;
  158. if (destlen > SIZE_T_CEILING)
  159. return -1;
  160. if (enclen > INT_MAX)
  161. return -1;
  162. memset(dest, 0, enclen);
  163. /* XXX/Yawning: If this ends up being too slow, this can be sped up
  164. * by separating the multiline format case and the normal case, and
  165. * processing 48 bytes of input at a time when newlines are desired.
  166. */
  167. #define ENCODE_CHAR(ch) \
  168. STMT_BEGIN \
  169. *d++ = ch; \
  170. if (flags & BASE64_ENCODE_MULTILINE) { \
  171. if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
  172. linelen = 0; \
  173. *d++ = '\n'; \
  174. } \
  175. } \
  176. STMT_END
  177. #define ENCODE_N(idx) \
  178. ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
  179. #define ENCODE_PAD() ENCODE_CHAR('=')
  180. /* Iterate over all the bytes in src. Each one will add 8 bits to the
  181. * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
  182. * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
  183. */
  184. for ( ; usrc < eous; ++usrc) {
  185. n = (n << 8) | *usrc;
  186. if ((++n_idx) == 3) {
  187. ENCODE_N(0);
  188. ENCODE_N(1);
  189. ENCODE_N(2);
  190. ENCODE_N(3);
  191. n_idx = 0;
  192. n = 0;
  193. }
  194. }
  195. switch (n_idx) {
  196. case 0:
  197. /* 0 leftover bits, no pading to add. */
  198. break;
  199. case 1:
  200. /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
  201. * by 2 padding characters.
  202. */
  203. n <<= 4;
  204. ENCODE_N(2);
  205. ENCODE_N(3);
  206. ENCODE_PAD();
  207. ENCODE_PAD();
  208. break;
  209. case 2:
  210. /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
  211. * by 1 padding character.
  212. */
  213. n <<= 2;
  214. ENCODE_N(1);
  215. ENCODE_N(2);
  216. ENCODE_N(3);
  217. ENCODE_PAD();
  218. break;
  219. default:
  220. /* Something went catastrophically wrong. */
  221. tor_fragile_assert();
  222. return -1;
  223. }
  224. #undef ENCODE_N
  225. #undef ENCODE_PAD
  226. #undef ENCODE_CHAR
  227. /* Multiline output always includes at least one newline. */
  228. if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
  229. *d++ = '\n';
  230. tor_assert(d - dest == (ptrdiff_t)enclen);
  231. *d++ = '\0'; /* NUL terminate the output. */
  232. return (int) enclen;
  233. }
  234. /** As base64_encode, but do not add any internal spaces or external padding
  235. * to the output stream. */
  236. int
  237. base64_encode_nopad(char *dest, size_t destlen,
  238. const uint8_t *src, size_t srclen)
  239. {
  240. int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
  241. if (n <= 0)
  242. return n;
  243. tor_assert((size_t)n < destlen && dest[n] == 0);
  244. char *in, *out;
  245. in = out = dest;
  246. while (*in) {
  247. if (*in == '=' || *in == '\n') {
  248. ++in;
  249. } else {
  250. *out++ = *in++;
  251. }
  252. }
  253. *out = 0;
  254. tor_assert(out - dest <= INT_MAX);
  255. return (int)(out - dest);
  256. }
  257. /** As base64_decode, but do not require any padding on the input */
  258. int
  259. base64_decode_nopad(uint8_t *dest, size_t destlen,
  260. const char *src, size_t srclen)
  261. {
  262. if (srclen > SIZE_T_CEILING - 4)
  263. return -1;
  264. char *buf = tor_malloc(srclen + 4);
  265. memcpy(buf, src, srclen+1);
  266. size_t buflen;
  267. switch (srclen % 4)
  268. {
  269. case 0:
  270. default:
  271. buflen = srclen;
  272. break;
  273. case 1:
  274. tor_free(buf);
  275. return -1;
  276. case 2:
  277. memcpy(buf+srclen, "==", 3);
  278. buflen = srclen + 2;
  279. break;
  280. case 3:
  281. memcpy(buf+srclen, "=", 2);
  282. buflen = srclen + 1;
  283. break;
  284. }
  285. int n = base64_decode((char*)dest, destlen, buf, buflen);
  286. tor_free(buf);
  287. return n;
  288. }
  289. #undef BASE64_OPENSSL_LINELEN
  290. /** @{ */
  291. /** Special values used for the base64_decode_table */
  292. #define X 255
  293. #define SP 64
  294. #define PAD 65
  295. /** @} */
  296. /** Internal table mapping byte values to what they represent in base64.
  297. * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
  298. * skipped. Xs are invalid and must not appear in base64. PAD indicates
  299. * end-of-string. */
  300. static const uint8_t base64_decode_table[256] = {
  301. X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
  302. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  303. SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
  304. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
  305. X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  306. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
  307. X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  308. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
  309. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  310. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  311. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  312. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  313. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  314. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  315. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  316. X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
  317. };
  318. /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
  319. * the result into <b>dest</b>, if it will fit within <b>destlen</b>
  320. * bytes. Return the number of bytes written on success; -1 if
  321. * destlen is too short, or other failure.
  322. *
  323. * NOTE 1: destlen is checked conservatively, as though srclen contained no
  324. * spaces or padding.
  325. *
  326. * NOTE 2: This implementation does not check for the correct number of
  327. * padding "=" characters at the end of the string, and does not check
  328. * for internal padding characters.
  329. */
  330. int
  331. base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  332. {
  333. const char *eos = src+srclen;
  334. uint32_t n=0;
  335. int n_idx=0;
  336. char *dest_orig = dest;
  337. /* Max number of bits == srclen*6.
  338. * Number of bytes required to hold all bits == (srclen*6)/8.
  339. * Yes, we want to round down: anything that hangs over the end of a
  340. * byte is padding. */
  341. if (destlen < (srclen*3)/4)
  342. return -1;
  343. if (destlen > SIZE_T_CEILING)
  344. return -1;
  345. memset(dest, 0, destlen);
  346. /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
  347. * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
  348. * 24 bits, batch them into 3 bytes and flush those bytes to dest.
  349. */
  350. for ( ; src < eos; ++src) {
  351. unsigned char c = (unsigned char) *src;
  352. uint8_t v = base64_decode_table[c];
  353. switch (v) {
  354. case X:
  355. /* This character isn't allowed in base64. */
  356. return -1;
  357. case SP:
  358. /* This character is whitespace, and has no effect. */
  359. continue;
  360. case PAD:
  361. /* We've hit an = character: the data is over. */
  362. goto end_of_loop;
  363. default:
  364. /* We have an actual 6-bit value. Append it to the bits in n. */
  365. n = (n<<6) | v;
  366. if ((++n_idx) == 4) {
  367. /* We've accumulated 24 bits in n. Flush them. */
  368. *dest++ = (n>>16);
  369. *dest++ = (n>>8) & 0xff;
  370. *dest++ = (n) & 0xff;
  371. n_idx = 0;
  372. n = 0;
  373. }
  374. }
  375. }
  376. end_of_loop:
  377. /* If we have leftover bits, we need to cope. */
  378. switch (n_idx) {
  379. case 0:
  380. default:
  381. /* No leftover bits. We win. */
  382. break;
  383. case 1:
  384. /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
  385. return -1;
  386. case 2:
  387. /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
  388. *dest++ = n >> 4;
  389. break;
  390. case 3:
  391. /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
  392. *dest++ = n >> 10;
  393. *dest++ = n >> 2;
  394. }
  395. tor_assert((dest-dest_orig) <= (ssize_t)destlen);
  396. tor_assert((dest-dest_orig) <= INT_MAX);
  397. return (int)(dest-dest_orig);
  398. }
  399. #undef X
  400. #undef SP
  401. #undef PAD
  402. /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
  403. * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
  404. * <b>dest</b>.
  405. */
  406. void
  407. base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
  408. {
  409. const char *end;
  410. char *cp;
  411. tor_assert(destlen >= srclen*2+1);
  412. tor_assert(destlen < SIZE_T_CEILING);
  413. cp = dest;
  414. end = src+srclen;
  415. while (src<end) {
  416. *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
  417. *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
  418. ++src;
  419. }
  420. *cp = '\0';
  421. }
  422. /** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
  423. static inline int
  424. hex_decode_digit_(char c)
  425. {
  426. switch (c) {
  427. case '0': return 0;
  428. case '1': return 1;
  429. case '2': return 2;
  430. case '3': return 3;
  431. case '4': return 4;
  432. case '5': return 5;
  433. case '6': return 6;
  434. case '7': return 7;
  435. case '8': return 8;
  436. case '9': return 9;
  437. case 'A': case 'a': return 10;
  438. case 'B': case 'b': return 11;
  439. case 'C': case 'c': return 12;
  440. case 'D': case 'd': return 13;
  441. case 'E': case 'e': return 14;
  442. case 'F': case 'f': return 15;
  443. default:
  444. return -1;
  445. }
  446. }
  447. /** Helper: given a hex digit, return its value, or -1 if it isn't hex. */
  448. int
  449. hex_decode_digit(char c)
  450. {
  451. return hex_decode_digit_(c);
  452. }
  453. /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode it
  454. * and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
  455. * Return 0 on success, -1 on failure. */
  456. int
  457. base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  458. {
  459. const char *end;
  460. int v1,v2;
  461. if ((srclen % 2) != 0)
  462. return -1;
  463. if (destlen < srclen/2 || destlen > SIZE_T_CEILING)
  464. return -1;
  465. memset(dest, 0, destlen);
  466. end = src+srclen;
  467. while (src<end) {
  468. v1 = hex_decode_digit_(*src);
  469. v2 = hex_decode_digit_(*(src+1));
  470. if (v1<0||v2<0)
  471. return -1;
  472. *(uint8_t*)dest = (v1<<4)|v2;
  473. ++dest;
  474. src+=2;
  475. }
  476. return 0;
  477. }