compress.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /* Copyright (c) 2004, Roger Dingledine.
  2. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  3. * Copyright (c) 2007-2017, The Tor Project, Inc. */
  4. /* See LICENSE for licensing information */
  5. /**
  6. * \file compress.c
  7. * \brief Common compression API.
  8. **/
  9. #include "orconfig.h"
  10. #include <stdlib.h>
  11. #include <stdio.h>
  12. #include <assert.h>
  13. #include <string.h>
  14. #include "torint.h"
  15. #ifdef HAVE_NETINET_IN_H
  16. #include <netinet/in.h>
  17. #endif
  18. #include "util.h"
  19. #include "torlog.h"
  20. #include "compress.h"
  21. #include "compress_lzma.h"
  22. #include "compress_zlib.h"
  23. /** @{ */
  24. /* These macros define the maximum allowable compression factor. Anything of
  25. * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
  26. * have an uncompression factor (uncompressed size:compressed size ratio) of
  27. * any greater than MAX_UNCOMPRESSION_FACTOR.
  28. *
  29. * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
  30. * be small to limit the attack multiplier, but we also want it to be large
  31. * enough so that no legitimate document --even ones we might invent in the
  32. * future -- ever compresses by a factor of greater than
  33. * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
  34. * large range of possible values. IMO, anything over 8 is probably safe; IMO
  35. * anything under 50 is probably sufficient.
  36. */
  37. #define MAX_UNCOMPRESSION_FACTOR 25
  38. #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
  39. /** @} */
  40. /** Return true if uncompressing an input of size <b>in_size</b> to an input of
  41. * size at least <b>size_out</b> looks like a compression bomb. */
  42. int
  43. tor_compress_is_compression_bomb(size_t size_in, size_t size_out)
  44. {
  45. if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER)
  46. return 0;
  47. return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
  48. }
  49. /** Given <b>level</b> return the memory level. The memory level is needed for
  50. * the various compression backends used in Tor.
  51. */
  52. int
  53. tor_compress_memory_level(compression_level_t level)
  54. {
  55. switch (level) {
  56. default:
  57. case HIGH_COMPRESSION: return 8;
  58. case MEDIUM_COMPRESSION: return 7;
  59. case LOW_COMPRESSION: return 6;
  60. }
  61. }
  62. /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly
  63. * allocated buffer, using the method described in <b>method</b>. Store the
  64. * compressed string in *<b>out</b>, and its length in *<b>out_len</b>.
  65. * Return 0 on success, -1 on failure.
  66. */
  67. int
  68. tor_compress(char **out, size_t *out_len,
  69. const char *in, size_t in_len,
  70. compress_method_t method)
  71. {
  72. if (method == GZIP_METHOD || method == ZLIB_METHOD)
  73. return tor_zlib_compress(out, out_len, in, in_len, method);
  74. if (method == LZMA_METHOD)
  75. return tor_lzma_compress(out, out_len, in, in_len, method);
  76. return -1;
  77. }
  78. /** Given zero or more zlib-compressed or gzip-compressed strings of
  79. * total length
  80. * <b>in_len</b> bytes at <b>in</b>, uncompress them into a newly allocated
  81. * buffer, using the method described in <b>method</b>. Store the uncompressed
  82. * string in *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on
  83. * success, -1 on failure.
  84. *
  85. * If <b>complete_only</b> is true, we consider a truncated input as a
  86. * failure; otherwise we decompress as much as we can. Warn about truncated
  87. * or corrupt inputs at <b>protocol_warn_level</b>.
  88. */
  89. int
  90. tor_uncompress(char **out, size_t *out_len,
  91. const char *in, size_t in_len,
  92. compress_method_t method,
  93. int complete_only,
  94. int protocol_warn_level)
  95. {
  96. if (method == GZIP_METHOD || method == ZLIB_METHOD)
  97. return tor_zlib_uncompress(out, out_len, in, in_len,
  98. method,
  99. complete_only,
  100. protocol_warn_level);
  101. if (method == LZMA_METHOD)
  102. return tor_lzma_uncompress(out, out_len, in, in_len,
  103. method,
  104. complete_only,
  105. protocol_warn_level);
  106. return -1;
  107. }
  108. /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely
  109. * to be compressed or not. If it is, return the likeliest compression method.
  110. * Otherwise, return UNKNOWN_METHOD.
  111. */
  112. compress_method_t
  113. detect_compression_method(const char *in, size_t in_len)
  114. {
  115. if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) {
  116. return GZIP_METHOD;
  117. } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
  118. (ntohs(get_uint16(in)) % 31) == 0) {
  119. return ZLIB_METHOD;
  120. } else if (in_len > 3 &&
  121. fast_memeq(in, "\x5d\x00\x00\x00", 4)) {
  122. return LZMA_METHOD;
  123. } else {
  124. return UNKNOWN_METHOD;
  125. }
  126. }
  127. /** Internal state for an incremental compression/decompression. The body of
  128. * this struct is not exposed. */
  129. struct tor_compress_state_t {
  130. compress_method_t method; /**< The compression method. */
  131. union {
  132. tor_zlib_compress_state_t *zlib_state;
  133. tor_lzma_compress_state_t *lzma_state;
  134. } u; /**< Compression backend state. */
  135. };
  136. /** Construct and return a tor_compress_state_t object using <b>method</b>. If
  137. * <b>compress</b>, it's for compression; otherwise it's for decompression. */
  138. tor_compress_state_t *
  139. tor_compress_new(int compress, compress_method_t method,
  140. compression_level_t compression_level)
  141. {
  142. tor_compress_state_t *state;
  143. state = tor_malloc_zero(sizeof(tor_compress_state_t));
  144. state->method = method;
  145. switch (method) {
  146. case GZIP_METHOD:
  147. case ZLIB_METHOD: {
  148. tor_zlib_compress_state_t *zlib_state =
  149. tor_zlib_compress_new(compress, method, compression_level);
  150. if (zlib_state == NULL)
  151. goto err;
  152. state->u.zlib_state = zlib_state;
  153. break;
  154. }
  155. case LZMA_METHOD: {
  156. tor_lzma_compress_state_t *lzma_state =
  157. tor_lzma_compress_new(compress, method, compression_level);
  158. if (lzma_state == NULL)
  159. goto err;
  160. state->u.lzma_state = lzma_state;
  161. break;
  162. }
  163. case NO_METHOD:
  164. case UNKNOWN_METHOD:
  165. goto err;
  166. }
  167. return state;
  168. err:
  169. tor_free(state);
  170. return NULL;
  171. }
  172. /** Compress/decompress some bytes using <b>state</b>. Read up to
  173. * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes
  174. * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true,
  175. * we've reached the end of the input.
  176. *
  177. * Return TOR_COMPRESS_DONE if we've finished the entire
  178. * compression/decompression.
  179. * Return TOR_COMPRESS_OK if we're processed everything from the input.
  180. * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>.
  181. * Return TOR_COMPRESS_ERROR if the stream is corrupt.
  182. */
  183. tor_compress_output_t
  184. tor_compress_process(tor_compress_state_t *state,
  185. char **out, size_t *out_len,
  186. const char **in, size_t *in_len,
  187. int finish)
  188. {
  189. tor_assert(state != NULL);
  190. switch (state->method) {
  191. case GZIP_METHOD:
  192. case ZLIB_METHOD:
  193. return tor_zlib_compress_process(state->u.zlib_state,
  194. out, out_len, in, in_len,
  195. finish);
  196. case LZMA_METHOD:
  197. return tor_lzma_compress_process(state->u.lzma_state,
  198. out, out_len, in, in_len,
  199. finish);
  200. case NO_METHOD:
  201. case UNKNOWN_METHOD:
  202. goto err;
  203. }
  204. err:
  205. return TOR_COMPRESS_ERROR;
  206. }
  207. /** Deallocate <b>state</b>. */
  208. void
  209. tor_compress_free(tor_compress_state_t *state)
  210. {
  211. if (state == NULL)
  212. return;
  213. switch (state->method) {
  214. case GZIP_METHOD:
  215. case ZLIB_METHOD:
  216. tor_zlib_compress_free(state->u.zlib_state);
  217. break;
  218. case LZMA_METHOD:
  219. tor_lzma_compress_free(state->u.lzma_state);
  220. break;
  221. case NO_METHOD:
  222. case UNKNOWN_METHOD:
  223. break;
  224. }
  225. tor_free(state);
  226. }
  227. /** Return the approximate number of bytes allocated for <b>state</b>. */
  228. size_t
  229. tor_compress_state_size(const tor_compress_state_t *state)
  230. {
  231. tor_assert(state != NULL);
  232. switch (state->method) {
  233. case GZIP_METHOD:
  234. case ZLIB_METHOD:
  235. return tor_zlib_compress_state_size(state->u.zlib_state);
  236. case LZMA_METHOD:
  237. return tor_lzma_compress_state_size(state->u.lzma_state);
  238. case NO_METHOD:
  239. case UNKNOWN_METHOD:
  240. goto err;
  241. }
  242. err:
  243. return 0;
  244. }