Browse Source

Treat unparseable (micro)descriptors and extrainfos as undownloadable

One pain point in evolving the Tor design and implementing has been
adding code that makes clients reject directory documents that they
previously would have accepted, if those descriptors actually exist.
When this happened, the clients would get the document, reject it,
and then decide to try downloading it again, ad infinitum.  This
problem becomes particularly obnoxious with authorities, since if
some authorities accept a descriptor that others don't, the ones
that don't accept it would go crazy trying to re-fetch it over and
over. (See for example ticket #9286.)

This patch tries to solve this problem by tracking, if a descriptor
isn't parseable, what its digest was, and whether it is invalid
because of some flaw that applies to the portion containing the
digest.  (This excludes RSA signature problems: RSA signatures
aren't included in the digest.  This means that a directory
authority can still put another directory authority into a loop by
mentioning a descriptor, and then serving that descriptor with an
invalid RSA signatures.  But that would also make the misbehaving
directory authority get DoSed by the server it's attacking, so it's
not much of an issue.)

We already have a mechanism to mark something undownloadable with
downloadstatus_mark_impossible(); we use that here for
microdescriptors, extrainfos, and router descriptors.

Unit tests to follow in another patch.

Closes ticket #11243.
Nick Mathewson 9 years ago
parent
commit
a30594605e

+ 7 - 0
changes/ticket11243

@@ -0,0 +1,7 @@
+  o Major features (downloading):
+    - Upon receiving a server descriptor, microdescriptor, extrainfo
+      document, or other object that is unparseable, if its digest
+      matches what we expected, then mark it as not to be downloaded
+      again. Previously, when we got a descriptor we didn't like, we
+      would keep trying to download it over and over. Closes ticket
+      11243.

+ 2 - 2
src/or/dirserv.c

@@ -478,7 +478,7 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose,
   s = desc;
   s = desc;
   list = smartlist_new();
   list = smartlist_new();
   if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0,
   if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 0, 0,
-                                     annotation_buf)) {
+                                     annotation_buf, NULL)) {
     SMARTLIST_FOREACH(list, routerinfo_t *, ri, {
     SMARTLIST_FOREACH(list, routerinfo_t *, ri, {
         msg_out = NULL;
         msg_out = NULL;
         tor_assert(ri->purpose == purpose);
         tor_assert(ri->purpose == purpose);
@@ -494,7 +494,7 @@ dirserv_add_multiple_descriptors(const char *desc, uint8_t purpose,
 
 
   s = desc;
   s = desc;
   if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 1, 0,
   if (!router_parse_list_from_string(&s, NULL, list, SAVED_NOWHERE, 1, 0,
-                                     NULL)) {
+                                     NULL, NULL)) {
     SMARTLIST_FOREACH(list, extrainfo_t *, ei, {
     SMARTLIST_FOREACH(list, extrainfo_t *, ei, {
         msg_out = NULL;
         msg_out = NULL;
 
 

+ 2 - 2
src/or/dirvote.c

@@ -3288,8 +3288,8 @@ dirvote_create_microdescriptor(const routerinfo_t *ri, int consensus_method)
 
 
   {
   {
     smartlist_t *lst = microdescs_parse_from_string(output,
     smartlist_t *lst = microdescs_parse_from_string(output,
-                                                 output+strlen(output), 0,
-                                                    SAVED_NOWHERE);
+                                                    output+strlen(output), 0,
+                                                    SAVED_NOWHERE, NULL);
     if (smartlist_len(lst) != 1) {
     if (smartlist_len(lst) != 1) {
       log_warn(LD_DIR, "We generated a microdescriptor we couldn't parse.");
       log_warn(LD_DIR, "We generated a microdescriptor we couldn't parse.");
       SMARTLIST_FOREACH(lst, microdesc_t *, md, microdesc_free(md));
       SMARTLIST_FOREACH(lst, microdesc_t *, md, microdesc_free(md));

+ 39 - 2
src/or/microdesc.c

@@ -149,10 +149,11 @@ microdescs_add_to_cache(microdesc_cache_t *cache,
 {
 {
   smartlist_t *descriptors, *added;
   smartlist_t *descriptors, *added;
   const int allow_annotations = (where != SAVED_NOWHERE);
   const int allow_annotations = (where != SAVED_NOWHERE);
+  smartlist_t *invalid_digests = smartlist_new();
 
 
   descriptors = microdescs_parse_from_string(s, eos,
   descriptors = microdescs_parse_from_string(s, eos,
                                              allow_annotations,
                                              allow_annotations,
-                                             where);
+                                             where, invalid_digests);
   if (listed_at != (time_t)-1) {
   if (listed_at != (time_t)-1) {
     SMARTLIST_FOREACH(descriptors, microdesc_t *, md,
     SMARTLIST_FOREACH(descriptors, microdesc_t *, md,
                       md->last_listed = listed_at);
                       md->last_listed = listed_at);
@@ -161,8 +162,23 @@ microdescs_add_to_cache(microdesc_cache_t *cache,
     digestmap_t *requested; /* XXXX actually we should just use a
     digestmap_t *requested; /* XXXX actually we should just use a
                                digest256map */
                                digest256map */
     requested = digestmap_new();
     requested = digestmap_new();
+    /* Set requested[d] to 1 for every md we requested. */
     SMARTLIST_FOREACH(requested_digests256, const char *, cp,
     SMARTLIST_FOREACH(requested_digests256, const char *, cp,
       digestmap_set(requested, cp, (void*)1));
       digestmap_set(requested, cp, (void*)1));
+    /* Set requested[d] to 3 for every md we requested which we will never be
+     * able to parse.  Remove the ones we didn't request from invalid_digests.
+     */
+    SMARTLIST_FOREACH_BEGIN(invalid_digests, char *, cp) {
+      if (digestmap_get(requested, cp)) {
+        digestmap_set(requested, cp, (void*)3);
+      } else {
+        tor_free(cp);
+        SMARTLIST_DEL_CURRENT(invalid_digests, cp);
+      }
+    } SMARTLIST_FOREACH_END(cp);
+    /* Update requested[d] to 2 for the mds we asked for and got. Delete the
+     * ones we never requested from the 'descriptors' smartlist.
+     */
     SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
     SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
       if (digestmap_get(requested, md->digest)) {
       if (digestmap_get(requested, md->digest)) {
         digestmap_set(requested, md->digest, (void*)2);
         digestmap_set(requested, md->digest, (void*)2);
@@ -172,8 +188,11 @@ microdescs_add_to_cache(microdesc_cache_t *cache,
         SMARTLIST_DEL_CURRENT(descriptors, md);
         SMARTLIST_DEL_CURRENT(descriptors, md);
       }
       }
     } SMARTLIST_FOREACH_END(md);
     } SMARTLIST_FOREACH_END(md);
+    /* Remove the ones we got or the invalid ones from requested_digests256.
+     */
     SMARTLIST_FOREACH_BEGIN(requested_digests256, char *, cp) {
     SMARTLIST_FOREACH_BEGIN(requested_digests256, char *, cp) {
-      if (digestmap_get(requested, cp) == (void*)2) {
+      void *status = digestmap_get(requested, cp);
+      if (status == (void*)2 || status == (void*)3) {
         tor_free(cp);
         tor_free(cp);
         SMARTLIST_DEL_CURRENT(requested_digests256, cp);
         SMARTLIST_DEL_CURRENT(requested_digests256, cp);
       }
       }
@@ -181,6 +200,24 @@ microdescs_add_to_cache(microdesc_cache_t *cache,
     digestmap_free(requested, NULL);
     digestmap_free(requested, NULL);
   }
   }
 
 
+  /* For every requested microdescriptor that was unparseable, mark it
+   * as not to be retried. */
+  if (smartlist_len(invalid_digests)) {
+    networkstatus_t *ns =
+      networkstatus_get_latest_consensus_by_flavor(FLAV_MICRODESC);
+    if (ns) {
+      SMARTLIST_FOREACH_BEGIN(invalid_digests, char *, d) {
+        routerstatus_t *rs =
+          router_get_mutable_consensus_status_by_descriptor_digest(ns, d);
+        if (rs && tor_memeq(d, rs->descriptor_digest, DIGEST256_LEN)) {
+          download_status_mark_impossible(&rs->dl_status);
+        }
+      } SMARTLIST_FOREACH_END(d);
+    }
+  }
+  SMARTLIST_FOREACH(invalid_digests, uint8_t *, d, tor_free(d));
+  smartlist_free(invalid_digests);
+
   added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
   added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
   smartlist_free(descriptors);
   smartlist_free(descriptors);
   return added;
   return added;

+ 1 - 1
src/or/networkstatus.c

@@ -1123,7 +1123,7 @@ networkstatus_copy_old_consensus_info(networkstatus_t *new_c,
     rs_new->last_dir_503_at = rs_old->last_dir_503_at;
     rs_new->last_dir_503_at = rs_old->last_dir_503_at;
 
 
     if (tor_memeq(rs_old->descriptor_digest, rs_new->descriptor_digest,
     if (tor_memeq(rs_old->descriptor_digest, rs_new->descriptor_digest,
-                DIGEST_LEN)) {
+                  DIGEST_LEN)) { /* XXXX Change this to digest256_len */
       /* And the same descriptor too! */
       /* And the same descriptor too! */
       memcpy(&rs_new->dl_status, &rs_old->dl_status,sizeof(download_status_t));
       memcpy(&rs_new->dl_status, &rs_old->dl_status,sizeof(download_status_t));
     }
     }

+ 1 - 0
src/or/or.h

@@ -1958,6 +1958,7 @@ typedef struct download_status_t {
   uint8_t n_download_failures; /**< Number of failures trying to download the
   uint8_t n_download_failures; /**< Number of failures trying to download the
                                 * most recent descriptor. */
                                 * most recent descriptor. */
   download_schedule_bitfield_t schedule : 8;
   download_schedule_bitfield_t schedule : 8;
+
 } download_status_t;
 } download_status_t;
 
 
 /** If n_download_failures is this high, the download can never happen. */
 /** If n_download_failures is this high, the download can never happen. */

+ 3 - 3
src/or/router.c

@@ -917,7 +917,7 @@ init_keys(void)
     }
     }
     if (mydesc) {
     if (mydesc) {
       was_router_added_t added;
       was_router_added_t added;
-      ri = router_parse_entry_from_string(mydesc, NULL, 1, 0, NULL);
+      ri = router_parse_entry_from_string(mydesc, NULL, 1, 0, NULL, NULL);
       if (!ri) {
       if (!ri) {
         log_err(LD_GENERAL,"Generated a routerinfo we couldn't parse.");
         log_err(LD_GENERAL,"Generated a routerinfo we couldn't parse.");
         return -1;
         return -1;
@@ -2447,7 +2447,7 @@ router_dump_router_to_string(routerinfo_t *router,
     const char *cp;
     const char *cp;
     routerinfo_t *ri_tmp;
     routerinfo_t *ri_tmp;
     cp = s_dup = tor_strdup(output);
     cp = s_dup = tor_strdup(output);
-    ri_tmp = router_parse_entry_from_string(cp, NULL, 1, 0, NULL);
+    ri_tmp = router_parse_entry_from_string(cp, NULL, 1, 0, NULL, NULL);
     if (!ri_tmp) {
     if (!ri_tmp) {
       log_err(LD_BUG,
       log_err(LD_BUG,
               "We just generated a router descriptor we can't parse.");
               "We just generated a router descriptor we can't parse.");
@@ -2729,7 +2729,7 @@ extrainfo_dump_to_string(char **s_out, extrainfo_t *extrainfo,
   s = smartlist_join_strings(chunks, "", 0, NULL);
   s = smartlist_join_strings(chunks, "", 0, NULL);
 
 
   cp = s_dup = tor_strdup(s);
   cp = s_dup = tor_strdup(s);
-  ei_tmp = extrainfo_parse_entry_from_string(cp, NULL, 1, NULL);
+  ei_tmp = extrainfo_parse_entry_from_string(cp, NULL, 1, NULL, NULL);
   if (!ei_tmp) {
   if (!ei_tmp) {
     if (write_stats_to_extrainfo) {
     if (write_stats_to_extrainfo) {
       log_warn(LD_GENERAL, "We just generated an extra-info descriptor "
       log_warn(LD_GENERAL, "We just generated an extra-info descriptor "

+ 51 - 4
src/or/routerlist.c

@@ -3251,7 +3251,7 @@ routerlist_reparse_old(routerlist_t *rl, signed_descriptor_t *sd)
 
 
   ri = router_parse_entry_from_string(body,
   ri = router_parse_entry_from_string(body,
                          body+sd->signed_descriptor_len+sd->annotations_len,
                          body+sd->signed_descriptor_len+sd->annotations_len,
-                         0, 1, NULL);
+                         0, 1, NULL, NULL);
   if (!ri)
   if (!ri)
     return NULL;
     return NULL;
   memcpy(&ri->cache_info, sd, sizeof(signed_descriptor_t));
   memcpy(&ri->cache_info, sd, sizeof(signed_descriptor_t));
@@ -3807,7 +3807,8 @@ router_load_single_router(const char *s, uint8_t purpose, int cache,
                "@source controller\n"
                "@source controller\n"
                "@purpose %s\n", router_purpose_to_string(purpose));
                "@purpose %s\n", router_purpose_to_string(purpose));
 
 
-  if (!(ri = router_parse_entry_from_string(s, NULL, 1, 0, annotation_buf))) {
+  if (!(ri = router_parse_entry_from_string(s, NULL, 1, 0,
+                                            annotation_buf, NULL))) {
     log_warn(LD_DIR, "Error parsing router descriptor; dropping.");
     log_warn(LD_DIR, "Error parsing router descriptor; dropping.");
     *msg = "Couldn't parse router descriptor.";
     *msg = "Couldn't parse router descriptor.";
     return -1;
     return -1;
@@ -3871,9 +3872,11 @@ router_load_routers_from_string(const char *s, const char *eos,
   int from_cache = (saved_location != SAVED_NOWHERE);
   int from_cache = (saved_location != SAVED_NOWHERE);
   int allow_annotations = (saved_location != SAVED_NOWHERE);
   int allow_annotations = (saved_location != SAVED_NOWHERE);
   int any_changed = 0;
   int any_changed = 0;
+  smartlist_t *invalid_digests = smartlist_new();
 
 
   router_parse_list_from_string(&s, eos, routers, saved_location, 0,
   router_parse_list_from_string(&s, eos, routers, saved_location, 0,
-                                allow_annotations, prepend_annotations);
+                                allow_annotations, prepend_annotations,
+                                invalid_digests);
 
 
   routers_update_status_from_consensus_networkstatus(routers, !from_cache);
   routers_update_status_from_consensus_networkstatus(routers, !from_cache);
 
 
@@ -3920,6 +3923,27 @@ router_load_routers_from_string(const char *s, const char *eos,
     }
     }
   } SMARTLIST_FOREACH_END(ri);
   } SMARTLIST_FOREACH_END(ri);
 
 
+  SMARTLIST_FOREACH_BEGIN(invalid_digests, const uint8_t *, bad_digest) {
+    /* This digest is never going to be parseable. */
+    base16_encode(fp, sizeof(fp), (char*)bad_digest, DIGEST_LEN);
+    if (requested_fingerprints && descriptor_digests) {
+      if (! smartlist_contains_string(requested_fingerprints, fp)) {
+        /* But we didn't ask for it, so we should assume shennanegans. */
+        continue;
+      }
+      smartlist_string_remove(requested_fingerprints, fp);
+    }
+    download_status_t *dls;
+    dls = router_get_dl_status_by_descriptor_digest((char*)bad_digest);
+    if (dls) {
+      log_info(LD_GENERAL, "Marking router with descriptor %s as unparseable, "
+               "and therefore undownloadable", fp);
+      download_status_mark_impossible(dls);
+    }
+  } SMARTLIST_FOREACH_END(bad_digest);
+  SMARTLIST_FOREACH(invalid_digests, uint8_t *, d, tor_free(d));
+  smartlist_free(invalid_digests);
+
   routerlist_assert_ok(routerlist);
   routerlist_assert_ok(routerlist);
 
 
   if (any_changed)
   if (any_changed)
@@ -3943,9 +3967,10 @@ router_load_extrainfo_from_string(const char *s, const char *eos,
   smartlist_t *extrainfo_list = smartlist_new();
   smartlist_t *extrainfo_list = smartlist_new();
   const char *msg;
   const char *msg;
   int from_cache = (saved_location != SAVED_NOWHERE);
   int from_cache = (saved_location != SAVED_NOWHERE);
+  smartlist_t *invalid_digests = smartlist_new();
 
 
   router_parse_list_from_string(&s, eos, extrainfo_list, saved_location, 1, 0,
   router_parse_list_from_string(&s, eos, extrainfo_list, saved_location, 1, 0,
-                                NULL);
+                                NULL, invalid_digests);
 
 
   log_info(LD_DIR, "%d elements to add", smartlist_len(extrainfo_list));
   log_info(LD_DIR, "%d elements to add", smartlist_len(extrainfo_list));
 
 
@@ -3966,6 +3991,28 @@ router_load_extrainfo_from_string(const char *s, const char *eos,
       }
       }
   } SMARTLIST_FOREACH_END(ei);
   } SMARTLIST_FOREACH_END(ei);
 
 
+  SMARTLIST_FOREACH_BEGIN(invalid_digests, const uint8_t *, bad_digest) {
+    /* This digest is never going to be parseable. */
+    char fp[HEX_DIGEST_LEN+1];
+    base16_encode(fp, sizeof(fp), (char*)bad_digest, DIGEST_LEN);
+    if (requested_fingerprints) {
+      if (! smartlist_contains_string(requested_fingerprints, fp)) {
+        /* But we didn't ask for it, so we should assume shennanegans. */
+        continue;
+      }
+      smartlist_string_remove(requested_fingerprints, fp);
+    }
+    signed_descriptor_t *sd =
+      router_get_by_extrainfo_digest((char*)bad_digest);
+    if (sd) {
+      log_info(LD_GENERAL, "Marking extrainfo with descriptor %s as "
+               "unparseable, and therefore undownloadable", fp);
+      download_status_mark_impossible(&sd->ei_dl_status);
+    }
+  } SMARTLIST_FOREACH_END(bad_digest);
+  SMARTLIST_FOREACH(invalid_digests, uint8_t *, d, tor_free(d));
+  smartlist_free(invalid_digests);
+
   routerlist_assert_ok(routerlist);
   routerlist_assert_ok(routerlist);
   router_rebuild_store(0, &router_get_routerlist()->extrainfo_store);
   router_rebuild_store(0, &router_get_routerlist()->extrainfo_store);
 
 

+ 61 - 21
src/or/routerparse.c

@@ -911,7 +911,8 @@ find_start_of_next_router_or_extrainfo(const char **s_ptr,
  * descriptor in the signed_descriptor_body field of each routerinfo_t.  If it
  * descriptor in the signed_descriptor_body field of each routerinfo_t.  If it
  * isn't SAVED_NOWHERE, remember the offset of each descriptor.
  * isn't SAVED_NOWHERE, remember the offset of each descriptor.
  *
  *
- * Returns 0 on success and -1 on failure.
+ * Returns 0 on success and -1 on failure.  Adds a digest to
+ * <b>invalid_digests_out</b> for every entry that was unparseable or invalid.
  */
  */
 int
 int
 router_parse_list_from_string(const char **s, const char *eos,
 router_parse_list_from_string(const char **s, const char *eos,
@@ -919,7 +920,8 @@ router_parse_list_from_string(const char **s, const char *eos,
                               saved_location_t saved_location,
                               saved_location_t saved_location,
                               int want_extrainfo,
                               int want_extrainfo,
                               int allow_annotations,
                               int allow_annotations,
-                              const char *prepend_annotations)
+                              const char *prepend_annotations,
+                              smartlist_t *invalid_digests_out)
 {
 {
   routerinfo_t *router;
   routerinfo_t *router;
   extrainfo_t *extrainfo;
   extrainfo_t *extrainfo;
@@ -939,6 +941,9 @@ router_parse_list_from_string(const char **s, const char *eos,
   tor_assert(eos >= *s);
   tor_assert(eos >= *s);
 
 
   while (1) {
   while (1) {
+    char raw_digest[DIGEST_LEN];
+    int have_raw_digest = 0;
+    int dl_again = 0;
     if (find_start_of_next_router_or_extrainfo(s, eos, &have_extrainfo) < 0)
     if (find_start_of_next_router_or_extrainfo(s, eos, &have_extrainfo) < 0)
       break;
       break;
 
 
@@ -955,18 +960,20 @@ router_parse_list_from_string(const char **s, const char *eos,
 
 
     if (have_extrainfo && want_extrainfo) {
     if (have_extrainfo && want_extrainfo) {
       routerlist_t *rl = router_get_routerlist();
       routerlist_t *rl = router_get_routerlist();
+      have_raw_digest = router_get_extrainfo_hash(*s, end-*s, raw_digest) == 0;
       extrainfo = extrainfo_parse_entry_from_string(*s, end,
       extrainfo = extrainfo_parse_entry_from_string(*s, end,
                                        saved_location != SAVED_IN_CACHE,
                                        saved_location != SAVED_IN_CACHE,
-                                       rl->identity_map);
+                                       rl->identity_map, &dl_again);
       if (extrainfo) {
       if (extrainfo) {
         signed_desc = &extrainfo->cache_info;
         signed_desc = &extrainfo->cache_info;
         elt = extrainfo;
         elt = extrainfo;
       }
       }
     } else if (!have_extrainfo && !want_extrainfo) {
     } else if (!have_extrainfo && !want_extrainfo) {
+      have_raw_digest = router_get_router_hash(*s, end-*s, raw_digest) == 0;
       router = router_parse_entry_from_string(*s, end,
       router = router_parse_entry_from_string(*s, end,
                                               saved_location != SAVED_IN_CACHE,
                                               saved_location != SAVED_IN_CACHE,
                                               allow_annotations,
                                               allow_annotations,
-                                              prepend_annotations);
+                                              prepend_annotations, &dl_again);
       if (router) {
       if (router) {
         log_debug(LD_DIR, "Read router '%s', purpose '%s'",
         log_debug(LD_DIR, "Read router '%s', purpose '%s'",
                   router_describe(router),
                   router_describe(router),
@@ -975,6 +982,9 @@ router_parse_list_from_string(const char **s, const char *eos,
         elt = router;
         elt = router;
       }
       }
     }
     }
+    if (! elt && ! dl_again && have_raw_digest && invalid_digests_out) {
+      smartlist_add(invalid_digests_out, tor_memdup(raw_digest, DIGEST_LEN));
+    }
     if (!elt) {
     if (!elt) {
       *s = end;
       *s = end;
       continue;
       continue;
@@ -1068,11 +1078,17 @@ find_single_ipv6_orport(const smartlist_t *list,
  * around when caching the router.
  * around when caching the router.
  *
  *
  * Only one of allow_annotations and prepend_annotations may be set.
  * Only one of allow_annotations and prepend_annotations may be set.
+ *
+ * If <b>can_dl_again_out</b> is provided, set *<b>can_dl_again_out</b> to 1
+ * if it's okay to try to download a descriptor with this same digest again,
+ * and 0 if it isn't.  (It might not be okay to download it again if part of
+ * the part covered by the digest is invalid.)
  */
  */
 routerinfo_t *
 routerinfo_t *
 router_parse_entry_from_string(const char *s, const char *end,
 router_parse_entry_from_string(const char *s, const char *end,
                                int cache_copy, int allow_annotations,
                                int cache_copy, int allow_annotations,
-                               const char *prepend_annotations)
+                               const char *prepend_annotations,
+                               int *can_dl_again_out)
 {
 {
   routerinfo_t *router = NULL;
   routerinfo_t *router = NULL;
   char digest[128];
   char digest[128];
@@ -1083,6 +1099,7 @@ router_parse_entry_from_string(const char *s, const char *end,
   size_t prepend_len = prepend_annotations ? strlen(prepend_annotations) : 0;
   size_t prepend_len = prepend_annotations ? strlen(prepend_annotations) : 0;
   int ok = 1;
   int ok = 1;
   memarea_t *area = NULL;
   memarea_t *area = NULL;
+  int can_dl_again = 0;
 
 
   tor_assert(!allow_annotations || !prepend_annotations);
   tor_assert(!allow_annotations || !prepend_annotations);
 
 
@@ -1389,19 +1406,20 @@ router_parse_entry_from_string(const char *s, const char *end,
     verified_digests = digestmap_new();
     verified_digests = digestmap_new();
   digestmap_set(verified_digests, signed_digest, (void*)(uintptr_t)1);
   digestmap_set(verified_digests, signed_digest, (void*)(uintptr_t)1);
 #endif
 #endif
-  if (check_signature_token(digest, DIGEST_LEN, tok, router->identity_pkey, 0,
-                            "router descriptor") < 0)
-    goto err;
 
 
   if (!router->or_port) {
   if (!router->or_port) {
     log_warn(LD_DIR,"or_port unreadable or 0. Failing.");
     log_warn(LD_DIR,"or_port unreadable or 0. Failing.");
     goto err;
     goto err;
   }
   }
 
 
+  can_dl_again = 1;
+  if (check_signature_token(digest, DIGEST_LEN, tok, router->identity_pkey, 0,
+                            "router descriptor") < 0)
+    goto err;
+
   if (!router->platform) {
   if (!router->platform) {
     router->platform = tor_strdup("<unknown>");
     router->platform = tor_strdup("<unknown>");
   }
   }
-
   goto done;
   goto done;
 
 
  err:
  err:
@@ -1418,6 +1436,8 @@ router_parse_entry_from_string(const char *s, const char *end,
     DUMP_AREA(area, "routerinfo");
     DUMP_AREA(area, "routerinfo");
     memarea_drop_all(area);
     memarea_drop_all(area);
   }
   }
+  if (can_dl_again_out)
+    *can_dl_again_out = can_dl_again;
   return router;
   return router;
 }
 }
 
 
@@ -1426,10 +1446,16 @@ router_parse_entry_from_string(const char *s, const char *end,
  * <b>cache_copy</b> is true, make a copy of the extra-info document in the
  * <b>cache_copy</b> is true, make a copy of the extra-info document in the
  * cache_info fields of the result.  If <b>routermap</b> is provided, use it
  * cache_info fields of the result.  If <b>routermap</b> is provided, use it
  * as a map from router identity to routerinfo_t when looking up signing keys.
  * as a map from router identity to routerinfo_t when looking up signing keys.
+ *
+ * If <b>can_dl_again_out</b> is provided, set *<b>can_dl_again_out</b> to 1
+ * if it's okay to try to download an extrainfo with this same digest again,
+ * and 0 if it isn't.  (It might not be okay to download it again if part of
+ * the part covered by the digest is invalid.)
  */
  */
 extrainfo_t *
 extrainfo_t *
 extrainfo_parse_entry_from_string(const char *s, const char *end,
 extrainfo_parse_entry_from_string(const char *s, const char *end,
-                           int cache_copy, struct digest_ri_map_t *routermap)
+                            int cache_copy, struct digest_ri_map_t *routermap,
+                            int *can_dl_again_out)
 {
 {
   extrainfo_t *extrainfo = NULL;
   extrainfo_t *extrainfo = NULL;
   char digest[128];
   char digest[128];
@@ -1439,6 +1465,7 @@ extrainfo_parse_entry_from_string(const char *s, const char *end,
   routerinfo_t *router = NULL;
   routerinfo_t *router = NULL;
   memarea_t *area = NULL;
   memarea_t *area = NULL;
   const char *s_dup = s;
   const char *s_dup = s;
+  int can_dl_again = 0;
 
 
   if (!end) {
   if (!end) {
     end = s + strlen(s);
     end = s + strlen(s);
@@ -1498,6 +1525,8 @@ extrainfo_parse_entry_from_string(const char *s, const char *end,
     goto err;
     goto err;
   }
   }
 
 
+  can_dl_again = 1;
+
   if (routermap &&
   if (routermap &&
       (router = digestmap_get((digestmap_t*)routermap,
       (router = digestmap_get((digestmap_t*)routermap,
                               extrainfo->cache_info.identity_digest))) {
                               extrainfo->cache_info.identity_digest))) {
@@ -1540,6 +1569,8 @@ extrainfo_parse_entry_from_string(const char *s, const char *end,
     DUMP_AREA(area, "extrainfo");
     DUMP_AREA(area, "extrainfo");
     memarea_drop_all(area);
     memarea_drop_all(area);
   }
   }
+  if (can_dl_again_out)
+    *can_dl_again_out = can_dl_again;
   return extrainfo;
   return extrainfo;
 }
 }
 
 
@@ -4006,12 +4037,15 @@ find_start_of_next_microdesc(const char *s, const char *eos)
  * If <b>saved_location</b> isn't SAVED_IN_CACHE, make a local copy of each
  * If <b>saved_location</b> isn't SAVED_IN_CACHE, make a local copy of each
  * descriptor in the body field of each microdesc_t.
  * descriptor in the body field of each microdesc_t.
  *
  *
- * Return all newly
- * parsed microdescriptors in a newly allocated smartlist_t. */
+ * Return all newly parsed microdescriptors in a newly allocated
+ * smartlist_t. If <b>invalid_disgests_out</b> is provided, add a SHA256
+ * microdesc digest to it for every microdesc that we found to be badly
+ * formed. */
 smartlist_t *
 smartlist_t *
 microdescs_parse_from_string(const char *s, const char *eos,
 microdescs_parse_from_string(const char *s, const char *eos,
                              int allow_annotations,
                              int allow_annotations,
-                             saved_location_t where)
+                             saved_location_t where,
+                             smartlist_t *invalid_digests_out)
 {
 {
   smartlist_t *tokens;
   smartlist_t *tokens;
   smartlist_t *result;
   smartlist_t *result;
@@ -4033,16 +4067,12 @@ microdescs_parse_from_string(const char *s, const char *eos,
   tokens = smartlist_new();
   tokens = smartlist_new();
 
 
   while (s < eos) {
   while (s < eos) {
+    int okay = 0;
+
     start_of_next_microdesc = find_start_of_next_microdesc(s, eos);
     start_of_next_microdesc = find_start_of_next_microdesc(s, eos);
     if (!start_of_next_microdesc)
     if (!start_of_next_microdesc)
       start_of_next_microdesc = eos;
       start_of_next_microdesc = eos;
 
 
-    if (tokenize_string(area, s, start_of_next_microdesc, tokens,
-                        microdesc_token_table, flags)) {
-      log_warn(LD_DIR, "Unparseable microdescriptor");
-      goto next;
-    }
-
     md = tor_malloc_zero(sizeof(microdesc_t));
     md = tor_malloc_zero(sizeof(microdesc_t));
     {
     {
       const char *cp = tor_memstr(s, start_of_next_microdesc-s,
       const char *cp = tor_memstr(s, start_of_next_microdesc-s,
@@ -4057,6 +4087,13 @@ microdescs_parse_from_string(const char *s, const char *eos,
         md->body = (char*)cp;
         md->body = (char*)cp;
       md->off = cp - start;
       md->off = cp - start;
     }
     }
+    crypto_digest256(md->digest, md->body, md->bodylen, DIGEST_SHA256);
+
+    if (tokenize_string(area, s, start_of_next_microdesc, tokens,
+                        microdesc_token_table, flags)) {
+      log_warn(LD_DIR, "Unparseable microdescriptor");
+      goto next;
+    }
 
 
     if ((tok = find_opt_by_keyword(tokens, A_LAST_LISTED))) {
     if ((tok = find_opt_by_keyword(tokens, A_LAST_LISTED))) {
       if (parse_iso_time(tok->args[0], &md->last_listed)) {
       if (parse_iso_time(tok->args[0], &md->last_listed)) {
@@ -4113,12 +4150,15 @@ microdescs_parse_from_string(const char *s, const char *eos,
       md->ipv6_exit_policy = parse_short_policy(tok->args[0]);
       md->ipv6_exit_policy = parse_short_policy(tok->args[0]);
     }
     }
 
 
-    crypto_digest256(md->digest, md->body, md->bodylen, DIGEST_SHA256);
-
     smartlist_add(result, md);
     smartlist_add(result, md);
+    okay = 1;
 
 
     md = NULL;
     md = NULL;
   next:
   next:
+    if (! okay && invalid_digests_out) {
+      smartlist_add(invalid_digests_out,
+                    tor_memdup(md->digest, DIGEST256_LEN));
+    }
     microdesc_free(md);
     microdesc_free(md);
     md = NULL;
     md = NULL;
 
 

+ 8 - 4
src/or/routerparse.h

@@ -29,14 +29,17 @@ int router_parse_list_from_string(const char **s, const char *eos,
                                   saved_location_t saved_location,
                                   saved_location_t saved_location,
                                   int is_extrainfo,
                                   int is_extrainfo,
                                   int allow_annotations,
                                   int allow_annotations,
-                                  const char *prepend_annotations);
+                                  const char *prepend_annotations,
+                                  smartlist_t *invalid_digests_out);
 
 
 routerinfo_t *router_parse_entry_from_string(const char *s, const char *end,
 routerinfo_t *router_parse_entry_from_string(const char *s, const char *end,
                                              int cache_copy,
                                              int cache_copy,
                                              int allow_annotations,
                                              int allow_annotations,
-                                             const char *prepend_annotations);
+                                             const char *prepend_annotations,
+                                             int *can_dl_again_out);
 extrainfo_t *extrainfo_parse_entry_from_string(const char *s, const char *end,
 extrainfo_t *extrainfo_parse_entry_from_string(const char *s, const char *end,
-                         int cache_copy, struct digest_ri_map_t *routermap);
+                             int cache_copy, struct digest_ri_map_t *routermap,
+                             int *can_dl_again_out);
 MOCK_DECL(addr_policy_t *, router_parse_addr_policy_item_from_string,
 MOCK_DECL(addr_policy_t *, router_parse_addr_policy_item_from_string,
     (const char *s, int assume_action));
     (const char *s, int assume_action));
 version_status_t tor_version_is_obsolete(const char *myversion,
 version_status_t tor_version_is_obsolete(const char *myversion,
@@ -60,7 +63,8 @@ ns_detached_signatures_t *networkstatus_parse_detached_signatures(
 
 
 smartlist_t *microdescs_parse_from_string(const char *s, const char *eos,
 smartlist_t *microdescs_parse_from_string(const char *s, const char *eos,
                                           int allow_annotations,
                                           int allow_annotations,
-                                          saved_location_t where);
+                                          saved_location_t where,
+                                          smartlist_t *invalid_digests_out);
 
 
 authority_cert_t *authority_cert_parse_from_string(const char *s,
 authority_cert_t *authority_cert_parse_from_string(const char *s,
                                                    const char **end_of_string);
                                                    const char **end_of_string);

+ 2 - 2
src/test/test_dir.c

@@ -186,7 +186,7 @@ test_dir_formats(void *arg)
   buf = router_dump_router_to_string(r1, pk2);
   buf = router_dump_router_to_string(r1, pk2);
   tt_assert(buf);
   tt_assert(buf);
   cp = buf;
   cp = buf;
-  rp1 = router_parse_entry_from_string((const char*)cp,NULL,1,0,NULL);
+  rp1 = router_parse_entry_from_string((const char*)cp,NULL,1,0,NULL,NULL);
   tt_assert(rp1);
   tt_assert(rp1);
   tt_int_op(rp1->addr,==, r1->addr);
   tt_int_op(rp1->addr,==, r1->addr);
   tt_int_op(rp1->or_port,==, r1->or_port);
   tt_int_op(rp1->or_port,==, r1->or_port);
@@ -231,7 +231,7 @@ test_dir_formats(void *arg)
 
 
   buf = router_dump_router_to_string(r2, pk1);
   buf = router_dump_router_to_string(r2, pk1);
   cp = buf;
   cp = buf;
-  rp2 = router_parse_entry_from_string((const char*)cp,NULL,1,0,NULL);
+  rp2 = router_parse_entry_from_string((const char*)cp,NULL,1,0,NULL,NULL);
   tt_assert(rp2);
   tt_assert(rp2);
   tt_int_op(rp2->addr,==, r2->addr);
   tt_int_op(rp2->addr,==, r2->addr);
   tt_int_op(rp2->or_port,==, r2->or_port);
   tt_int_op(rp2->or_port,==, r2->or_port);

+ 1 - 1
src/test/test_microdesc.c

@@ -367,7 +367,7 @@ test_md_generate(void *arg)
   microdesc_t *md = NULL;
   microdesc_t *md = NULL;
   (void)arg;
   (void)arg;
 
 
-  ri = router_parse_entry_from_string(test_ri, NULL, 0, 0, NULL);
+  ri = router_parse_entry_from_string(test_ri, NULL, 0, 0, NULL, NULL);
   tt_assert(ri);
   tt_assert(ri);
   md = dirvote_create_microdescriptor(ri, 8);
   md = dirvote_create_microdescriptor(ri, 8);
   tt_str_op(md->body, ==, test_md_8);
   tt_str_op(md->body, ==, test_md_8);