Prechádzať zdrojové kódy

More gracefully handle corrupt state files.

Save a backup if we get odd circuitbuildtimes and other state info.

In the case of circuit build times, we no longer assert, and reset our state.
Mike Perry 14 rokov pred
rodič
commit
a9edb0b4f6
3 zmenil súbory, kde vykonal 69 pridanie a 31 odobranie
  1. 2 0
      changes/cbt-bugfixes
  2. 23 4
      src/or/circuitbuild.c
  3. 44 27
      src/or/config.c

+ 2 - 0
changes/cbt-bugfixes

@@ -29,5 +29,7 @@
      parameter and via a LearnCircuitBuildTimeout config option. Also
      automatically disable circuit build time calculation if we are either
      a AuthoritativeDirectory, or if we fail to write our state file. Bug 1296.
+   - More gracefully handle corrupt state files, removing asserts in favor
+     of saving a backup and resetting state.
 
 

+ 23 - 4
src/or/circuitbuild.c

@@ -193,12 +193,11 @@ circuit_build_times_new_consensus_params(circuit_build_times_t *cbt,
   int32_t num = networkstatus_get_param(ns, "cbtrecentcount",
                    CBT_DEFAULT_RECENT_CIRCUITS);
 
-  if (num != cbt->liveness.num_recent_circs) {
+  if (num > 0 && num != cbt->liveness.num_recent_circs) {
     int8_t *recent_circs;
     log_notice(LD_CIRC, "Changing recent timeout size from %d to %d",
                cbt->liveness.num_recent_circs, num);
 
-    tor_assert(num > 0);
     tor_assert(cbt->liveness.timeouts_after_firsthop);
 
     /*
@@ -675,6 +674,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
             "Corrupt state file? Build times count mismatch. "
             "Read %d times, but file says %d", loaded_cnt,
             state->TotalBuildTimes);
+    *msg = tor_strdup("Build times count mismatch.");
+    circuit_build_times_reset(cbt);
+    tor_free(loaded_times);
+    return -1;
   }
 
   circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
@@ -688,8 +691,18 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
   log_info(LD_CIRC,
            "Loaded %d/%d values from %d lines in circuit time histogram",
            tot_values, cbt->total_build_times, N);
-  tor_assert(cbt->total_build_times == tot_values);
-  tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE);
+
+  if (cbt->total_build_times != tot_values
+        || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) {
+    log_warn(LD_CIRC,
+            "Corrupt state file? Shuffled build times mismatch. "
+            "Read %d times, but file says %d", tot_values,
+            state->TotalBuildTimes);
+    *msg = tor_strdup("Build times count mismatch.");
+    circuit_build_times_reset(cbt);
+    tor_free(loaded_times);
+    return -1;
+  }
 
   circuit_build_times_set_timeout(cbt);
 
@@ -742,6 +755,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
     n++;
   }
 
+  /*
+   * We are erring and asserting here because this can only happen
+   * in codepaths other than startup. The startup state parsing code
+   * performs this same check, and resets state if it hits it. If we
+   * hit it at runtime, something serious has gone wrong.
+   */
   if (n!=cbt->total_build_times) {
     log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
             cbt->total_build_times);

+ 44 - 27
src/or/config.c

@@ -4837,25 +4837,63 @@ or_state_validate(or_state_t *old_state, or_state_t *state,
 }
 
 /** Replace the current persistent state with <b>new_state</b> */
-static void
+static int
 or_state_set(or_state_t *new_state)
 {
   char *err = NULL;
+  int ret = 0;
   tor_assert(new_state);
   config_free(&state_format, global_state);
   global_state = new_state;
   if (entry_guards_parse_state(global_state, 1, &err)<0) {
     log_warn(LD_GENERAL,"%s",err);
     tor_free(err);
+    ret = -1;
   }
   if (rep_hist_load_state(global_state, &err)<0) {
     log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err);
     tor_free(err);
+    ret = -1;
   }
   if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) {
     log_warn(LD_GENERAL,"%s",err);
     tor_free(err);
+    ret = -1;
   }
+  return ret;
+}
+
+/**
+ * Save a broken state file to a backup location.
+ */
+static void
+or_state_save_broken(char *fname)
+{
+  int i;
+  file_status_t status;
+  size_t len = strlen(fname)+16;
+  char *fname2 = tor_malloc(len);
+  for (i = 0; i < 100; ++i) {
+    tor_snprintf(fname2, len, "%s.%d", fname, i);
+    status = file_status(fname2);
+    if (status == FN_NOENT)
+      break;
+  }
+  if (i == 100) {
+    log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
+             "state files to move aside. Discarding the old state file.",
+             fname);
+    unlink(fname);
+  } else {
+    log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
+             "to \"%s\".  This could be a bug in Tor; please tell "
+             "the developers.", fname, fname2);
+    if (rename(fname, fname2) < 0) {
+      log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
+               "OS gave an error of %s", strerror(errno));
+    }
+  }
+  tor_free(fname2);
 }
 
 /** Reload the persistent state from disk, generating a new state as needed.
@@ -4917,31 +4955,8 @@ or_state_load(void)
              " This is a bug in Tor.");
     goto done;
   } else if (badstate && contents) {
-    int i;
-    file_status_t status;
-    size_t len = strlen(fname)+16;
-    char *fname2 = tor_malloc(len);
-    for (i = 0; i < 100; ++i) {
-      tor_snprintf(fname2, len, "%s.%d", fname, i);
-      status = file_status(fname2);
-      if (status == FN_NOENT)
-        break;
-    }
-    if (i == 100) {
-      log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
-               "state files to move aside. Discarding the old state file.",
-               fname);
-      unlink(fname);
-    } else {
-      log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
-               "to \"%s\".  This could be a bug in Tor; please tell "
-               "the developers.", fname, fname2);
-      if (rename(fname, fname2) < 0) {
-        log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
-                 "OS gave an error of %s", strerror(errno));
-      }
-    }
-    tor_free(fname2);
+    or_state_save_broken(fname);
+
     tor_free(contents);
     config_free(&state_format, new_state);
 
@@ -4953,7 +4968,9 @@ or_state_load(void)
   } else {
     log_info(LD_GENERAL, "Initialized state");
   }
-  or_state_set(new_state);
+  if (or_state_set(new_state) == -1) {
+    or_state_save_broken(fname);
+  }
   new_state = NULL;
   if (!contents) {
     global_state->next_write = 0;