Browse Source

Siphash-2-4 is now our hash in nearly all cases.

I've made an exception for cases where I'm sure that users can't
influence the inputs.  This is likely to cause a slowdown somewhere,
but it's safer to siphash everything and *then* look for cases to
optimize.

This patch doesn't actually get us any _benefit_ from siphash yet,
since we don't really randomize the key at any point.
Nick Mathewson 10 years ago
parent
commit
0e97c8e23e

+ 31 - 7
src/common/address.c

@@ -874,6 +874,32 @@ tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src)
   memcpy(dest, src, sizeof(tor_addr_t));
 }
 
+/** Copy a tor_addr_t from <b>src</b> to <b>dest</b>, taking extra case to
+ * copy only the well-defined portions. Used for computing hashes of
+ * addresses.
+ */
+void
+tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src)
+{
+  tor_assert(src != dest);
+  tor_assert(src);
+  tor_assert(dest);
+  memset(dest, 0, sizeof(tor_addr_t));
+  dest->family = src->family;
+  switch (tor_addr_family(src))
+    {
+    case AF_INET:
+      dest->addr.in_addr.s_addr = src->addr.in_addr.s_addr;
+      break;
+    case AF_INET6:
+      memcpy(dest->addr.in6_addr.s6_addr, src->addr.in6_addr.s6_addr, 16);
+    case AF_UNSPEC:
+      break;
+    default:
+      tor_fragile_assert();
+    }
+}
+
 /** Given two addresses <b>addr1</b> and <b>addr2</b>, return 0 if the two
  * addresses are equivalent under the mask mbits, less than 0 if addr1
  * precedes addr2, and greater than 0 otherwise.
@@ -995,19 +1021,17 @@ tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
   }
 }
 
-/** Return a hash code based on the address addr */
-unsigned int
+/** Return a hash code based on the address addr. DOCDOC extra */
+uint64_t
 tor_addr_hash(const tor_addr_t *addr)
 {
   switch (tor_addr_family(addr)) {
   case AF_INET:
-    return tor_addr_to_ipv4h(addr);
+    return siphash24g(&addr->addr.in_addr.s_addr, 4);
   case AF_UNSPEC:
     return 0x4e4d5342;
-  case AF_INET6: {
-    const uint32_t *u = tor_addr_to_in6_addr32(addr);
-    return u[0] + u[1] + u[2] + u[3];
-    }
+  case AF_INET6:
+    return siphash24g(&addr->addr.in6_addr.s6_addr, 16);
   default:
     tor_fragile_assert();
     return 0;

+ 2 - 1
src/common/address.h

@@ -167,7 +167,7 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
  * "exactly". */
 #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
 
-unsigned int tor_addr_hash(const tor_addr_t *addr);
+uint64_t tor_addr_hash(const tor_addr_t *addr);
 int tor_addr_is_v4(const tor_addr_t *addr);
 int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
                           const char *filename, int lineno);
@@ -192,6 +192,7 @@ const char * tor_addr_to_str(char *dest, const tor_addr_t *addr, size_t len,
                              int decorate);
 int tor_addr_parse(tor_addr_t *addr, const char *src);
 void tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src);
+void tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src);
 void tor_addr_from_ipv4n(tor_addr_t *dest, uint32_t v4addr);
 /** Set <b>dest</b> to the IPv4 address encoded in <b>v4addr</b> in host
  * order. */

+ 2 - 8
src/common/container.c

@@ -1004,7 +1004,7 @@ strmap_entries_eq(const strmap_entry_t *a, const strmap_entry_t *b)
 static INLINE unsigned int
 strmap_entry_hash(const strmap_entry_t *a)
 {
-  return ht_string_hash(a->key);
+  return (unsigned) siphash24g(a->key, strlen(a->key));
 }
 
 /** Helper: compare digestmap_entry_t objects by key value. */
@@ -1018,13 +1018,7 @@ digestmap_entries_eq(const digestmap_entry_t *a, const digestmap_entry_t *b)
 static INLINE unsigned int
 digestmap_entry_hash(const digestmap_entry_t *a)
 {
-#if SIZEOF_INT != 8
-  const uint32_t *p = (const uint32_t*)a->key;
-  return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-#else
-  const uint64_t *p = (const uint64_t*)a->key;
-  return p[0] ^ p[1];
-#endif
+  return (unsigned) siphash24g(a->key, DIGEST_LEN);
 }
 
 HT_PROTOTYPE(strmap_impl, strmap_entry_t, node, strmap_entry_hash,

+ 11 - 10
src/common/container.h

@@ -7,6 +7,7 @@
 #define TOR_CONTAINER_H
 
 #include "util.h"
+#include "siphash.h"
 
 /** A resizeable list of pointers, with associated helpful functionality.
  *
@@ -610,11 +611,11 @@ typedef struct {
 static INLINE void
 digestset_add(digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   bitarray_set(set->ba, BIT(d1));
   bitarray_set(set->ba, BIT(d2));
   bitarray_set(set->ba, BIT(d3));
@@ -626,11 +627,11 @@ digestset_add(digestset_t *set, const char *digest)
 static INLINE int
 digestset_contains(const digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   return bitarray_is_set(set->ba, BIT(d1)) &&
          bitarray_is_set(set->ba, BIT(d2)) &&
          bitarray_is_set(set->ba, BIT(d3)) &&

+ 1 - 0
src/ext/siphash.h

@@ -1,5 +1,6 @@
 #ifndef SIPHASH_H
 #define SIPHASH_H
+
 struct sipkey {
   uint64_t k0;
   uint64_t k1;

+ 1 - 6
src/or/channel.c

@@ -95,12 +95,7 @@ typedef struct channel_idmap_entry_s {
 static INLINE unsigned
 channel_idmap_hash(const channel_idmap_entry_t *ent)
 {
-  const unsigned *a = (const unsigned *)ent->digest;
-#if SIZEOF_INT == 4
-  return a[0] ^ a[1] ^ a[2] ^ a[3] ^ a[4];
-#elif SIZEOF_INT == 8
-  return a[0] ^ a[1];
-#endif
+  return (unsigned) siphash24g(ent->digest, DIGEST_LEN);
 }
 
 static INLINE int

+ 1 - 1
src/or/dns.c

@@ -239,7 +239,7 @@ cached_resolves_eq(cached_resolve_t *a, cached_resolve_t *b)
 static INLINE unsigned int
 cached_resolve_hash(cached_resolve_t *a)
 {
-  return ht_string_hash(a->address);
+  return (unsigned) siphash24g((const uint8_t*)a->address, strlen(a->address));
 }
 
 HT_PROTOTYPE(cache_map, cached_resolve_t, node, cached_resolve_hash,

+ 2 - 11
src/or/fp_pair.c

@@ -32,17 +32,8 @@ fp_pair_map_entries_eq(const fp_pair_map_entry_t *a,
 static INLINE unsigned int
 fp_pair_map_entry_hash(const fp_pair_map_entry_t *a)
 {
-  const uint32_t *p;
-  unsigned int hash;
-
-  p = (const uint32_t *)(a->key.first);
-  /* Hashes are 20 bytes long, so 5 times uint32_t */
-  hash = p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-  /* Now XOR in the second fingerprint */
-  p = (const uint32_t *)(a->key.second);
-  hash ^= p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-
-  return hash;
+  tor_assert(sizeof(a->key) == DIGEST_LEN*2);
+  return (unsigned) siphash24g(&a->key, DIGEST_LEN*2);
 }
 
 /*

+ 5 - 3
src/or/geoip.c

@@ -486,10 +486,12 @@ static HT_HEAD(clientmap, clientmap_entry_t) client_history =
 static INLINE unsigned
 clientmap_entry_hash(const clientmap_entry_t *a)
 {
-  unsigned h = tor_addr_hash(&a->addr);
+  unsigned h = (unsigned) tor_addr_hash(&a->addr);
+
   if (a->transport_name)
-    h += ht_string_hash(a->transport_name);
-  return ht_improve_hash(h);
+    h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name));
+
+  return h;
 }
 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
 static INLINE int

+ 1 - 6
src/or/microdesc.c

@@ -45,12 +45,7 @@ struct microdesc_cache_t {
 static INLINE unsigned int
 microdesc_hash_(microdesc_t *md)
 {
-  unsigned *d = (unsigned*)md->digest;
-#if SIZEOF_INT == 4
-  return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
-#else
-  return d[0] ^ d[1] ^ d[2] ^ d[3];
-#endif
+  return (unsigned) siphash24g(md->digest, sizeof(md->digest));
 }
 
 /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */

+ 1 - 8
src/or/nodelist.c

@@ -43,14 +43,7 @@ typedef struct nodelist_t {
 static INLINE unsigned int
 node_id_hash(const node_t *node)
 {
-#if SIZEOF_INT == 4
-  const uint32_t *p = (const uint32_t*)node->identity;
-  return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-#elif SIZEOF_INT == 8
-  const uint64_t *p = (const uint32_t*)node->identity;
-  const uint32_t *p32 = (const uint32_t*)node->identity;
-  return p[0] ^ p[1] ^ p32[4];
-#endif
+  return (unsigned) siphash24g(node->identity, DIGEST_LEN);
 }
 
 static INLINE unsigned int

+ 17 - 13
src/or/policies.c

@@ -597,21 +597,25 @@ policy_eq(policy_map_ent_t *a, policy_map_ent_t *b)
 
 /** Return a hashcode for <b>ent</b> */
 static unsigned int
-policy_hash(policy_map_ent_t *ent)
+policy_hash(const policy_map_ent_t *ent)
 {
-  addr_policy_t *a = ent->policy;
-  unsigned int r;
-  if (a->is_private)
-    r = 0x1234abcd;
-  else
-    r = tor_addr_hash(&a->addr);
-  r += a->prt_min << 8;
-  r += a->prt_max << 16;
-  r += a->maskbits;
-  if (a->policy_type == ADDR_POLICY_REJECT)
-    r ^= 0xffffffff;
+  const addr_policy_t *a = ent->policy;
+  addr_policy_t aa;
+  memset(&aa, 0, sizeof(aa));
+
+  aa.prt_min = a->prt_min;
+  aa.prt_max = a->prt_max;
+  aa.maskbits = a->maskbits;
+  aa.policy_type = a->policy_type;
+  aa.is_private = a->is_private;
+
+  if (a->is_private) {
+    aa.is_private = 1;
+  } else {
+    tor_addr_copy_tight(&aa.addr, &a->addr);
+  }
 
-  return r;
+  return (unsigned) siphash24g(&aa, sizeof(aa));
 }
 
 HT_PROTOTYPE(policy_map, policy_map_ent_t, node, policy_hash,