Bläddra i källkod

Siphash-2-4 is now our hash in nearly all cases.

I've made an exception for cases where I'm sure that users can't
influence the inputs.  This is likely to cause a slowdown somewhere,
but it's safer to siphash everything and *then* look for cases to
optimize.

This patch doesn't actually get us any _benefit_ from siphash yet,
since we don't really randomize the key at any point.
Nick Mathewson 10 år sedan
förälder
incheckning
0e97c8e23e
12 ändrade filer med 75 tillägg och 74 borttagningar
  1. 31 7
      src/common/address.c
  2. 2 1
      src/common/address.h
  3. 2 8
      src/common/container.c
  4. 11 10
      src/common/container.h
  5. 1 0
      src/ext/siphash.h
  6. 1 6
      src/or/channel.c
  7. 1 1
      src/or/dns.c
  8. 2 11
      src/or/fp_pair.c
  9. 5 3
      src/or/geoip.c
  10. 1 6
      src/or/microdesc.c
  11. 1 8
      src/or/nodelist.c
  12. 17 13
      src/or/policies.c

+ 31 - 7
src/common/address.c

@@ -874,6 +874,32 @@ tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src)
   memcpy(dest, src, sizeof(tor_addr_t));
 }
 
+/** Copy a tor_addr_t from <b>src</b> to <b>dest</b>, taking extra case to
+ * copy only the well-defined portions. Used for computing hashes of
+ * addresses.
+ */
+void
+tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src)
+{
+  tor_assert(src != dest);
+  tor_assert(src);
+  tor_assert(dest);
+  memset(dest, 0, sizeof(tor_addr_t));
+  dest->family = src->family;
+  switch (tor_addr_family(src))
+    {
+    case AF_INET:
+      dest->addr.in_addr.s_addr = src->addr.in_addr.s_addr;
+      break;
+    case AF_INET6:
+      memcpy(dest->addr.in6_addr.s6_addr, src->addr.in6_addr.s6_addr, 16);
+    case AF_UNSPEC:
+      break;
+    default:
+      tor_fragile_assert();
+    }
+}
+
 /** Given two addresses <b>addr1</b> and <b>addr2</b>, return 0 if the two
  * addresses are equivalent under the mask mbits, less than 0 if addr1
  * precedes addr2, and greater than 0 otherwise.
@@ -995,19 +1021,17 @@ tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
   }
 }
 
-/** Return a hash code based on the address addr */
-unsigned int
+/** Return a hash code based on the address addr. DOCDOC extra */
+uint64_t
 tor_addr_hash(const tor_addr_t *addr)
 {
   switch (tor_addr_family(addr)) {
   case AF_INET:
-    return tor_addr_to_ipv4h(addr);
+    return siphash24g(&addr->addr.in_addr.s_addr, 4);
   case AF_UNSPEC:
     return 0x4e4d5342;
-  case AF_INET6: {
-    const uint32_t *u = tor_addr_to_in6_addr32(addr);
-    return u[0] + u[1] + u[2] + u[3];
-    }
+  case AF_INET6:
+    return siphash24g(&addr->addr.in6_addr.s6_addr, 16);
   default:
     tor_fragile_assert();
     return 0;

+ 2 - 1
src/common/address.h

@@ -167,7 +167,7 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
  * "exactly". */
 #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
 
-unsigned int tor_addr_hash(const tor_addr_t *addr);
+uint64_t tor_addr_hash(const tor_addr_t *addr);
 int tor_addr_is_v4(const tor_addr_t *addr);
 int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
                           const char *filename, int lineno);
@@ -192,6 +192,7 @@ const char * tor_addr_to_str(char *dest, const tor_addr_t *addr, size_t len,
                              int decorate);
 int tor_addr_parse(tor_addr_t *addr, const char *src);
 void tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src);
+void tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src);
 void tor_addr_from_ipv4n(tor_addr_t *dest, uint32_t v4addr);
 /** Set <b>dest</b> to the IPv4 address encoded in <b>v4addr</b> in host
  * order. */

+ 2 - 8
src/common/container.c

@@ -1004,7 +1004,7 @@ strmap_entries_eq(const strmap_entry_t *a, const strmap_entry_t *b)
 static INLINE unsigned int
 strmap_entry_hash(const strmap_entry_t *a)
 {
-  return ht_string_hash(a->key);
+  return (unsigned) siphash24g(a->key, strlen(a->key));
 }
 
 /** Helper: compare digestmap_entry_t objects by key value. */
@@ -1018,13 +1018,7 @@ digestmap_entries_eq(const digestmap_entry_t *a, const digestmap_entry_t *b)
 static INLINE unsigned int
 digestmap_entry_hash(const digestmap_entry_t *a)
 {
-#if SIZEOF_INT != 8
-  const uint32_t *p = (const uint32_t*)a->key;
-  return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-#else
-  const uint64_t *p = (const uint64_t*)a->key;
-  return p[0] ^ p[1];
-#endif
+  return (unsigned) siphash24g(a->key, DIGEST_LEN);
 }
 
 HT_PROTOTYPE(strmap_impl, strmap_entry_t, node, strmap_entry_hash,

+ 11 - 10
src/common/container.h

@@ -7,6 +7,7 @@
 #define TOR_CONTAINER_H
 
 #include "util.h"
+#include "siphash.h"
 
 /** A resizeable list of pointers, with associated helpful functionality.
  *
@@ -610,11 +611,11 @@ typedef struct {
 static INLINE void
 digestset_add(digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   bitarray_set(set->ba, BIT(d1));
   bitarray_set(set->ba, BIT(d2));
   bitarray_set(set->ba, BIT(d3));
@@ -626,11 +627,11 @@ digestset_add(digestset_t *set, const char *digest)
 static INLINE int
 digestset_contains(const digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   return bitarray_is_set(set->ba, BIT(d1)) &&
          bitarray_is_set(set->ba, BIT(d2)) &&
          bitarray_is_set(set->ba, BIT(d3)) &&

+ 1 - 0
src/ext/siphash.h

@@ -1,5 +1,6 @@
 #ifndef SIPHASH_H
 #define SIPHASH_H
+
 struct sipkey {
   uint64_t k0;
   uint64_t k1;

+ 1 - 6
src/or/channel.c

@@ -95,12 +95,7 @@ typedef struct channel_idmap_entry_s {
 static INLINE unsigned
 channel_idmap_hash(const channel_idmap_entry_t *ent)
 {
-  const unsigned *a = (const unsigned *)ent->digest;
-#if SIZEOF_INT == 4
-  return a[0] ^ a[1] ^ a[2] ^ a[3] ^ a[4];
-#elif SIZEOF_INT == 8
-  return a[0] ^ a[1];
-#endif
+  return (unsigned) siphash24g(ent->digest, DIGEST_LEN);
 }
 
 static INLINE int

+ 1 - 1
src/or/dns.c

@@ -239,7 +239,7 @@ cached_resolves_eq(cached_resolve_t *a, cached_resolve_t *b)
 static INLINE unsigned int
 cached_resolve_hash(cached_resolve_t *a)
 {
-  return ht_string_hash(a->address);
+  return (unsigned) siphash24g((const uint8_t*)a->address, strlen(a->address));
 }
 
 HT_PROTOTYPE(cache_map, cached_resolve_t, node, cached_resolve_hash,

+ 2 - 11
src/or/fp_pair.c

@@ -32,17 +32,8 @@ fp_pair_map_entries_eq(const fp_pair_map_entry_t *a,
 static INLINE unsigned int
 fp_pair_map_entry_hash(const fp_pair_map_entry_t *a)
 {
-  const uint32_t *p;
-  unsigned int hash;
-
-  p = (const uint32_t *)(a->key.first);
-  /* Hashes are 20 bytes long, so 5 times uint32_t */
-  hash = p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-  /* Now XOR in the second fingerprint */
-  p = (const uint32_t *)(a->key.second);
-  hash ^= p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-
-  return hash;
+  tor_assert(sizeof(a->key) == DIGEST_LEN*2);
+  return (unsigned) siphash24g(&a->key, DIGEST_LEN*2);
 }
 
 /*

+ 5 - 3
src/or/geoip.c

@@ -486,10 +486,12 @@ static HT_HEAD(clientmap, clientmap_entry_t) client_history =
 static INLINE unsigned
 clientmap_entry_hash(const clientmap_entry_t *a)
 {
-  unsigned h = tor_addr_hash(&a->addr);
+  unsigned h = (unsigned) tor_addr_hash(&a->addr);
+
   if (a->transport_name)
-    h += ht_string_hash(a->transport_name);
-  return ht_improve_hash(h);
+    h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name));
+
+  return h;
 }
 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
 static INLINE int

+ 1 - 6
src/or/microdesc.c

@@ -45,12 +45,7 @@ struct microdesc_cache_t {
 static INLINE unsigned int
 microdesc_hash_(microdesc_t *md)
 {
-  unsigned *d = (unsigned*)md->digest;
-#if SIZEOF_INT == 4
-  return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
-#else
-  return d[0] ^ d[1] ^ d[2] ^ d[3];
-#endif
+  return (unsigned) siphash24g(md->digest, sizeof(md->digest));
 }
 
 /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */

+ 1 - 8
src/or/nodelist.c

@@ -43,14 +43,7 @@ typedef struct nodelist_t {
 static INLINE unsigned int
 node_id_hash(const node_t *node)
 {
-#if SIZEOF_INT == 4
-  const uint32_t *p = (const uint32_t*)node->identity;
-  return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-#elif SIZEOF_INT == 8
-  const uint64_t *p = (const uint32_t*)node->identity;
-  const uint32_t *p32 = (const uint32_t*)node->identity;
-  return p[0] ^ p[1] ^ p32[4];
-#endif
+  return (unsigned) siphash24g(node->identity, DIGEST_LEN);
 }
 
 static INLINE unsigned int

+ 17 - 13
src/or/policies.c

@@ -597,21 +597,25 @@ policy_eq(policy_map_ent_t *a, policy_map_ent_t *b)
 
 /** Return a hashcode for <b>ent</b> */
 static unsigned int
-policy_hash(policy_map_ent_t *ent)
+policy_hash(const policy_map_ent_t *ent)
 {
-  addr_policy_t *a = ent->policy;
-  unsigned int r;
-  if (a->is_private)
-    r = 0x1234abcd;
-  else
-    r = tor_addr_hash(&a->addr);
-  r += a->prt_min << 8;
-  r += a->prt_max << 16;
-  r += a->maskbits;
-  if (a->policy_type == ADDR_POLICY_REJECT)
-    r ^= 0xffffffff;
+  const addr_policy_t *a = ent->policy;
+  addr_policy_t aa;
+  memset(&aa, 0, sizeof(aa));
+
+  aa.prt_min = a->prt_min;
+  aa.prt_max = a->prt_max;
+  aa.maskbits = a->maskbits;
+  aa.policy_type = a->policy_type;
+  aa.is_private = a->is_private;
+
+  if (a->is_private) {
+    aa.is_private = 1;
+  } else {
+    tor_addr_copy_tight(&aa.addr, &a->addr);
+  }
 
-  return r;
+  return (unsigned) siphash24g(&aa, sizeof(aa));
 }
 
 HT_PROTOTYPE(policy_map, policy_map_ent_t, node, policy_hash,