Browse Source

Rewrite the core of parse_short_policy() to be faster.

The old implementation did some funky out-of-order lexing, and
tended to parse every port twice if the %d-%d pattern didn't match.

Closes ticket 28853.
Nick Mathewson 5 years ago
parent
commit
3dd1f064a7
2 changed files with 45 additions and 38 deletions
  1. 3 0
      changes/ticket28853
  2. 42 38
      src/core/or/policies.c

+ 3 - 0
changes/ticket28853

@@ -0,0 +1,3 @@
+  o Minor features (performance):
+    - Replace parse_short_policy() with a faster implementation, to improve
+      microdescriptor parsing time. Closes ticket 28853.

+ 42 - 38
src/core/or/policies.c

@@ -2720,7 +2720,7 @@ parse_short_policy(const char *summary)
   int is_accept;
   int n_entries;
   short_policy_entry_t entries[MAX_EXITPOLICY_SUMMARY_LEN]; /* overkill */
-  const char *next;
+  char *next;
 
   if (!strcmpstart(summary, "accept ")) {
     is_accept = 1;
@@ -2735,57 +2735,56 @@ parse_short_policy(const char *summary)
 
   n_entries = 0;
   for ( ; *summary; summary = next) {
-    const char *comma = strchr(summary, ',');
-    unsigned low, high;
-    char dummy;
-    char ent_buf[32];
-    size_t len;
-
-    next = comma ? comma+1 : strchr(summary, '\0');
-    len = comma ? (size_t)(comma - summary) : strlen(summary);
-
     if (n_entries == MAX_EXITPOLICY_SUMMARY_LEN) {
       log_fn(LOG_PROTOCOL_WARN, LD_DIR, "Impossibly long policy summary %s",
              escaped(orig_summary));
       return NULL;
     }
 
-    if (! TOR_ISDIGIT(*summary) || len > (sizeof(ent_buf)-1)) {
-      /* unrecognized entry format. skip it. */
-      continue;
-    }
-    if (len < 1) {
-      /* empty; skip it. */
-      /* XXX This happens to be unreachable, since if len==0, then *summary is
-       * ',' or '\0', and the TOR_ISDIGIT test above would have failed. */
-      continue;
+    unsigned low, high;
+    int ok;
+    low = (unsigned) tor_parse_ulong(summary, 10, 1, 65535, &ok, &next);
+    if (!ok) {
+      if (! TOR_ISDIGIT(*summary) || *summary == ',') {
+        /* Unrecognized format: skip it. */
+        goto skip_ent;
+      } else {
+        goto bad_ent;
+      }
     }
 
-    memcpy(ent_buf, summary, len);
-    ent_buf[len] = '\0';
+    switch (*next) {
+      case ',':
+        ++next;
+        /* fall through */
+      case '\0':
+        high = low;
+        break;
+      case '-':
+        high = (unsigned) tor_parse_ulong(next+1, 10, low, 65535, &ok, &next);
+        if (!ok)
+          goto bad_ent;
+
+        if (*next == ',')
+          ++next;
+        else if (*next != '\0')
+          goto bad_ent;
 
-    if (tor_sscanf(ent_buf, "%u-%u%c", &low, &high, &dummy) == 2) {
-      if (low<1 || low>65535 || high<1 || high>65535 || low>high) {
-        log_fn(LOG_PROTOCOL_WARN, LD_DIR,
-               "Found bad entry in policy summary %s", escaped(orig_summary));
-        return NULL;
-      }
-    } else if (tor_sscanf(ent_buf, "%u%c", &low, &dummy) == 1) {
-      if (low<1 || low>65535) {
-        log_fn(LOG_PROTOCOL_WARN, LD_DIR,
-               "Found bad entry in policy summary %s", escaped(orig_summary));
-        return NULL;
-      }
-      high = low;
-    } else {
-      log_fn(LOG_PROTOCOL_WARN, LD_DIR,"Found bad entry in policy summary %s",
-             escaped(orig_summary));
-      return NULL;
+        break;
+      default:
+        goto bad_ent;
     }
 
     entries[n_entries].min_port = low;
     entries[n_entries].max_port = high;
     n_entries++;
+
+    continue;
+  skip_ent:
+    next = strchr(next, ',');
+    if (!next)
+      break;
+    ++next;
   }
 
   if (n_entries == 0) {
@@ -2806,6 +2805,11 @@ parse_short_policy(const char *summary)
   result->n_entries = n_entries;
   memcpy(result->entries, entries, sizeof(short_policy_entry_t)*n_entries);
   return result;
+
+ bad_ent:
+  log_fn(LOG_PROTOCOL_WARN, LD_DIR,"Found bad entry in policy summary %s",
+         escaped(orig_summary));
+  return NULL;
 }
 
 /** Write <b>policy</b> back out into a string. */