bloomfilt.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /* Copyright (c) 2003-2004, Roger Dingledine
  2. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  3. * Copyright (c) 2007-2018, The Tor Project, Inc. */
  4. /* See LICENSE for licensing information */
  5. /**
  6. * \file bloomfilt.c
  7. * \brief Uses bitarray_t to implement a bloom filter.
  8. **/
  9. #include <stdlib.h>
  10. #include "lib/malloc/malloc.h"
  11. #include "lib/container/bloomfilt.h"
  12. #include "lib/intmath/bits.h"
  13. #include "lib/log/util_bug.h"
  14. #include "siphash.h"
  15. /** How many bloom-filter bits we set per address. This is twice the
  16. * BLOOMFILT_N_HASHES value, since we split the siphash output into two 32-bit
  17. * values. */
  18. #define N_BITS_PER_ITEM (BLOOMFILT_N_HASHES * 2)
  19. struct bloomfilt_t {
  20. /** siphash keys to make BLOOMFILT_N_HASHES independent hashes for each
  21. * items. */
  22. struct sipkey key[BLOOMFILT_N_HASHES];
  23. bloomfilt_hash_fn hashfn; /**< Function used to generate hashes */
  24. uint32_t mask; /**< One less than the number of bits in <b>ba</b>; always
  25. * one less than a power of two. */
  26. bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
  27. };
  28. #define BIT(set, n) ((n) & (set)->mask)
  29. /** Add the element <b>item</b> to <b>set</b>. */
  30. void
  31. bloomfilt_add(bloomfilt_t *set,
  32. const void *item)
  33. {
  34. int i;
  35. for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
  36. uint64_t h = set->hashfn(&set->key[i], item);
  37. uint32_t high_bits = (uint32_t)(h >> 32);
  38. uint32_t low_bits = (uint32_t)(h);
  39. bitarray_set(set->ba, BIT(set, high_bits));
  40. bitarray_set(set->ba, BIT(set, low_bits));
  41. }
  42. }
  43. /** If <b>item</b> is in <b>set</b>, return nonzero. Otherwise,
  44. * <em>probably</em> return zero. */
  45. int
  46. bloomfilt_probably_contains(const bloomfilt_t *set,
  47. const void *item)
  48. {
  49. int i, matches = 0;
  50. for (i = 0; i < BLOOMFILT_N_HASHES; ++i) {
  51. uint64_t h = set->hashfn(&set->key[i], item);
  52. uint32_t high_bits = (uint32_t)(h >> 32);
  53. uint32_t low_bits = (uint32_t)(h);
  54. // Note that !! is necessary here, since bitarray_is_set does not
  55. // necessarily return 1 on true.
  56. matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
  57. matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
  58. }
  59. return matches == N_BITS_PER_ITEM;
  60. }
  61. /** Return a newly allocated bloomfilt_t, optimized to hold a total of
  62. * <b>max_elements</b> elements with a reasonably low false positive weight.
  63. *
  64. * Uses the siphash-based function <b>hashfn</b> to compute hard-to-collide
  65. * functions of the items, and the key material <b>random_key</b> to
  66. * key the hash. There must be BLOOMFILT_KEY_LEN bytes in the supplied key.
  67. **/
  68. bloomfilt_t *
  69. bloomfilt_new(int max_elements,
  70. bloomfilt_hash_fn hashfn,
  71. const uint8_t *random_key)
  72. {
  73. /* The probability of false positives is about P=(1 - exp(-kn/m))^k, where k
  74. * is the number of hash functions per entry, m is the bits in the array,
  75. * and n is the number of elements inserted. For us, k==4, n<=max_elements,
  76. * and m==n_bits= approximately max_elements*32. This gives
  77. * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
  78. *
  79. * It would be more optimal in space vs false positives to get this false
  80. * positive rate by going for k==13, and m==18.5n, but we also want to
  81. * conserve CPU, and k==13 is pretty big.
  82. */
  83. int n_bits = 1u << (tor_log2(max_elements)+5);
  84. bloomfilt_t *r = tor_malloc(sizeof(bloomfilt_t));
  85. r->mask = n_bits - 1;
  86. r->ba = bitarray_init_zero(n_bits);
  87. tor_assert(sizeof(r->key) == BLOOMFILT_KEY_LEN);
  88. memcpy(r->key, random_key, sizeof(r->key));
  89. r->hashfn = hashfn;
  90. return r;
  91. }
  92. /** Free all storage held in <b>set</b>. */
  93. void
  94. bloomfilt_free_(bloomfilt_t *set)
  95. {
  96. if (!set)
  97. return;
  98. bitarray_free(set->ba);
  99. tor_free(set);
  100. }