Browse Source

Write a bunch of module documentation.

This commit adds or improves the module-level documenation for:

  buffers.c circuitstats.c command.c connection_edge.c control.c
  cpuworker.c crypto_curve25519.c crypto_curve25519.h
  crypto_ed25519.c crypto_format.c dircollate.c dirserv.c dns.c
  dns_structs.h fp_pair.c geoip.c hibernate.c keypin.c ntmain.c
  onion.c onion_fast.c onion_ntor.c onion_tap.c periodic.c
  protover.c protover.h reasons.c rephist.c replaycache.c
  routerlist.c routerparse.c routerset.c statefile.c status.c
  tor_main.c workqueue.c

In particular, I've tried to explain (for each documented module)
what each module does, what's in it, what the big idea is, why it
belongs in Tor, and who calls it.  In a few cases, I've added TODO
notes about refactoring opportunities.

I've also renamed an argument, and fixed a few DOCDOC comments.
Nick Mathewson 7 years ago
parent
commit
aae034d13e

+ 48 - 4
src/common/crypto_curve25519.c

@@ -5,6 +5,14 @@
  * \file crypto_curve25519.c
  *
  * \brief Wrapper code for a curve25519 implementation.
+ *
+ * Curve25519 is an Elliptic-Curve Diffie Hellman handshake, designed by
+ * Dan Bernstein.  For more information, see https://cr.yp.to/ecdh.html
+ *
+ * Tor uses Curve25519 as the basis of its "ntor" circuit extension
+ * handshake, and in related code.  The functions in this module are
+ * used to find the most suitable available Curve25519 implementation,
+ * to provide wrappers around it, and so on.
  */
 
 #define CRYPTO_CURVE25519_PRIVATE
@@ -39,15 +47,23 @@ int curve25519_donna(uint8_t *mypublic,
 
 static void pick_curve25519_basepoint_impl(void);
 
+/** This is set to 1 if we have an optimized Ed25519-based
+ * implementation for multiplying a value by the basepoint; to 0 if we
+ * don't, and to -1 if we haven't checked. */
 static int curve25519_use_ed = -1;
 
+/**
+ * Helper function: call the most appropriate backend to compute the
+ * scalar "secret" times the point "point".  Store the result in
+ * "output".  Return 0 on success, negative on failure.
+ **/
 STATIC int
 curve25519_impl(uint8_t *output, const uint8_t *secret,
-                const uint8_t *basepoint)
+                const uint8_t *point)
 {
   uint8_t bp[CURVE25519_PUBKEY_LEN];
   int r;
-  memcpy(bp, basepoint, CURVE25519_PUBKEY_LEN);
+  memcpy(bp, point, CURVE25519_PUBKEY_LEN);
   /* Clear the high bit, in case our backend foolishly looks at it. */
   bp[31] &= 0x7f;
 #ifdef USE_CURVE25519_DONNA
@@ -61,6 +77,11 @@ curve25519_impl(uint8_t *output, const uint8_t *secret,
   return r;
 }
 
+/**
+ * Helper function: Multiply the scalar "secret" by the Curve25519
+ * basepoint (X=9), and store the result in "output".  Return 0 on
+ * success, -1 on false.
+ */
 STATIC int
 curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret)
 {
@@ -85,6 +106,10 @@ curve25519_basepoint_impl(uint8_t *output, const uint8_t *secret)
   return r;
 }
 
+/**
+ * Override the decision of whether to use the Ed25519-based basepoint
+ * multiply function.  Used for testing.
+ */
 void
 curve25519_set_impl_params(int use_ed)
 {
@@ -142,6 +167,10 @@ curve25519_secret_key_generate(curve25519_secret_key_t *key_out,
   return 0;
 }
 
+/**
+ * Given a secret key in <b>seckey</b>, create the corresponding public
+ * key in <b>key_out</b>.
+ */
 void
 curve25519_public_key_generate(curve25519_public_key_t *key_out,
                                const curve25519_secret_key_t *seckey)
@@ -149,6 +178,11 @@ curve25519_public_key_generate(curve25519_public_key_t *key_out,
   curve25519_basepoint_impl(key_out->public_key, seckey->secret_key);
 }
 
+/**
+ * Construct a new keypair in *<b>keypair_out</b>. If <b>extra_strong</b>
+ * is true, this key is possibly going to get used more than once, so
+ * use a better-than-usual RNG. Return 0 on success, -1 on failure. */
+ */
 int
 curve25519_keypair_generate(curve25519_keypair_t *keypair_out,
                             int extra_strong)
@@ -159,7 +193,13 @@ curve25519_keypair_generate(curve25519_keypair_t *keypair_out,
   return 0;
 }
 
-/* DOCDOC */
+/** Store the keypair <b>keypair</b>, including its secret and public
+ * parts, to the file <b>fname</b>.  Use the string tag <b>tag</b> to
+ * distinguish this from other Curve25519 keypairs. Return 0 on success,
+ * -1 on failure.
+ *
+ * See crypto_write_tagged_contents_to_file() for more information on
+ * the metaformat used for these keys.*/
 int
 curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair,
                                  const char *fname,
@@ -182,7 +222,10 @@ curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair,
   return r;
 }
 
-/* DOCDOC */
+/** Read a curve25519 keypair from a file named <b>fname</b> created by
+ * curve25519_keypair_write_to_file(). Store the keypair in
+ * <b>keypair_out</b>, and the associated tag string in <b>tag_out</b>.
+ * Return 0 on success, and -1 on failure. */
 int
 curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out,
                                   char **tag_out,
@@ -197,6 +240,7 @@ curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out,
   if (len != sizeof(content))
     goto end;
 
+  /* Make sure that the public key matches the secret key */
   memcpy(keypair_out->seckey.secret_key, content, CURVE25519_SECKEY_LEN);
   curve25519_public_key_generate(&keypair_out->pubkey, &keypair_out->seckey);
   if (tor_memneq(keypair_out->pubkey.public_key,

+ 10 - 2
src/common/crypto_curve25519.h

@@ -14,12 +14,20 @@
 /** Length of the result of a curve25519 handshake. */
 #define CURVE25519_OUTPUT_LEN 32
 
-/** Wrapper type for a curve25519 public key */
+/** Wrapper type for a curve25519 public key.
+ *
+ *  (We define a separate type for these to make it less likely that we'll
+ *  mistake them for secret keys.)
+ * */
 typedef struct curve25519_public_key_t {
   uint8_t public_key[CURVE25519_PUBKEY_LEN];
 } curve25519_public_key_t;
 
-/** Wrapper type for a curve25519 secret key */
+/** Wrapper type for a curve25519 secret key
+ *
+ * (We define a separate type for these to make it less likely that we'll
+ * mistake them for public keys.)
+ **/
 typedef struct curve25519_secret_key_t {
   uint8_t secret_key[CURVE25519_SECKEY_LEN];
 } curve25519_secret_key_t;

+ 28 - 1
src/common/crypto_ed25519.c

@@ -5,6 +5,14 @@
  * \file crypto_ed25519.c
  *
  * \brief Wrapper code for an ed25519 implementation.
+ *
+ * Ed25519 is a Schnorr signature on a Twisted Edwards curve, defined
+ * by Dan Bernstein. For more information, see https://ed25519.cr.yp.to/
+ *
+ * This module wraps our choice of Ed25519 backend, and provides a few
+ * convenience functions for checking and generating signatures.  It also
+ * provides Tor-specific tools for key blinding and for converting Ed25519
+ * keys to and from the corresponding Curve25519 keys.
  */
 
 #include "orconfig.h"
@@ -28,7 +36,7 @@
 static void pick_ed25519_impl(void);
 static int ed25519_impl_spot_check(void);
 
-/** An Ed25519 implementation */
+/** An Ed25519 implementation, as a set of function pointers. */
 typedef struct {
   int (*selftest)(void);
 
@@ -53,6 +61,8 @@ typedef struct {
                                        int);
 } ed25519_impl_t;
 
+/** The Ref10 Ed25519 implementation. This one is pure C and lightly
+ * optimized. */
 static const ed25519_impl_t impl_ref10 = {
   NULL,
 
@@ -71,6 +81,8 @@ static const ed25519_impl_t impl_ref10 = {
   ed25519_ref10_pubkey_from_curve25519_pubkey,
 };
 
+/** The Ref10 Ed25519 implementation. This one is heavily optimized, but still
+ * mostly C. The C still tends to be heavily platform-specific. */
 static const ed25519_impl_t impl_donna = {
   ed25519_donna_selftest,
 
@@ -89,8 +101,15 @@ static const ed25519_impl_t impl_donna = {
   ed25519_donna_pubkey_from_curve25519_pubkey,
 };
 
+/** Which Ed25519 implementation are we using?  NULL if we haven't decided
+ * yet. */
 static const ed25519_impl_t *ed25519_impl = NULL;
 
+/** Helper: Return our chosen Ed25519 implementation.
+ *
+ * This should only be called after we've picked an implementation, but
+ * it _does_ recover if you forget this.
+ **/
 static inline const ed25519_impl_t *
 get_ed_impl(void)
 {
@@ -101,7 +120,12 @@ get_ed_impl(void)
 }
 
 #ifdef TOR_UNIT_TESTS
+/** For testing: used to remember our actual choice of Ed25519
+ * implementation */
 static const ed25519_impl_t *saved_ed25519_impl = NULL;
+/** For testing: Use the Ed25519 implementation called <b>name</b> until
+ * crypto_ed25519_testing_restore_impl is called.  Recognized names are
+ * "donna" and "ref10". */
 void
 crypto_ed25519_testing_force_impl(const char *name)
 {
@@ -114,6 +138,9 @@ crypto_ed25519_testing_force_impl(const char *name)
     ed25519_impl = &impl_ref10;
   }
 }
+/** For testing: go back to whatever Ed25519 implementation we had picked
+ * before crypto_ed25519_testing_force_impl was called.
+ */
 void
 crypto_ed25519_testing_restore_impl(void)
 {

+ 7 - 0
src/common/crypto_format.c

@@ -123,6 +123,10 @@ crypto_read_tagged_contents_from_file(const char *fname,
   return r;
 }
 
+/** Encode <b>pkey</b> as a base64-encoded string, without trailing "="
+ * characters, in the buffer <b>output</b>, which must have at least
+ * CURVE25519_BASE64_PADDED_LEN+1 bytes available.  Return 0 on success, -1 on
+ * failure. */
 int
 curve25519_public_to_base64(char *output,
                             const curve25519_public_key_t *pkey)
@@ -135,6 +139,9 @@ curve25519_public_to_base64(char *output,
   return 0;
 }
 
+/** Try to decode a base64-encoded curve25519 public key from <b>input</b>
+ * into the object at <b>pkey</b>. Return 0 on success, -1 on failure.
+ * Accepts keys with or without a trailing "=". */
 int
 curve25519_public_from_base64(curve25519_public_key_t *pkey,
                               const char *input)

+ 14 - 0
src/common/workqueue.c

@@ -6,6 +6,20 @@
  *
  * \brief Implements worker threads, queues of work for them, and mechanisms
  * for them to send answers back to the main thread.
+ *
+ * The main structure here is a threadpool_t : it manages a set of worker
+ * threads, a queue of pending work, and a reply queue.  Every piece of work
+ * is a workqueue_entry_t, containing data to process and a function to
+ * process it with.
+ *
+ * The main thread informs the worker threads of pending work by using a
+ * condition variable.  The workers inform the main process of completed work
+ * by using an alert_sockets_t object, as implemented in compat_threads.c.
+ *
+ * The main thread can also queue an "update" that will be handled by all the
+ * workers.  This is useful for updating state that all the workers share.
+ *
+ * In Tor today, there is currently only one thread pool, used in cpuworker.c.
  */
 
 #include "orconfig.h"

+ 16 - 4
src/or/buffers.c

@@ -6,10 +6,22 @@
 
 /**
  * \file buffers.c
- * \brief Implements a generic interface buffer.  Buffers are
- * fairly opaque string holders that can read to or flush from:
- * memory, file descriptors, or TLS connections.  Buffers are implemented
- * as linked lists of memory chunks.
+ * \brief Implements a generic buffer interface.
+ *
+ * A buf_t is a (fairly) opaque byte-oriented FIFO that can read to or flush
+ * from memory, sockets, file descriptors, TLS connections, or another buf_t.
+ * Buffers are implemented as linked lists of memory chunks.
+ *
+ * All socket-backed and TLS-based connection_t objects have a pair of
+ * buffers: one for incoming data, and one for outcoming data.  These are fed
+ * and drained from functions in connection.c, trigged by events that are
+ * monitored in main.c.
+ *
+ * This module has basic support for reading and writing on buf_t objects. It
+ * also contains specialized functions for handling particular protocols
+ * on a buf_t backend, including SOCKS (used in connection_edge.c), Tor cells
+ * (used in connection_or.c and channeltls.c), HTTP (used in directory.c), and
+ * line-oriented communication (used in control.c).
  **/
 #define BUFFERS_PRIVATE
 #include "or.h"

+ 12 - 1
src/or/circuitstats.c

@@ -9,6 +9,18 @@
  *
  * \brief Maintains and analyzes statistics about circuit built times, so we
  * can tell how long we may need to wait for a fast circuit to be constructed.
+ *
+ * By keeping these statistics, a client learns when it should time out a slow
+ * circuit for being too slow, and when it should keep a circuit open in order
+ * to wait for it to complete.
+ *
+ * The information here is kept in a circuit_built_times_t structure, which is
+ * currently a singleton, but doesn't need to be.  It's updated by calls to
+ * circuit_build_times_count_timeout() from circuituse.c,
+ * circuit_build_times_count_close() from circuituse.c, and
+ * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
+ * calls into this module, mostly from circuitlist.c.  Observations are
+ * persisted to disk via the or_state_t-related calls.
  */
 
 #define CIRCUITSTATS_PRIVATE
@@ -329,7 +341,6 @@ circuit_build_times_min_timeout(void)
               "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
               num);
   }
-
   return num;
 }
 

+ 20 - 0
src/or/command.c

@@ -7,6 +7,26 @@
 /**
  * \file command.c
  * \brief Functions for processing incoming cells.
+ *
+ * When we receive a cell from a client or a relay, it arrives on some
+ * channel, and tells us what to do with it. In this module, we dispatch based
+ * on the cell type using the functions command_process_cell() and
+ * command_process_var_cell(), and deal with the cell accordingly.  (These
+ * handlers are installed on a channel with the command_setup_channel()
+ * function.)
+ *
+ * Channels have a chance to handle some cell types on their own before they
+ * are ever passed here --- typically, they do this for cells that are
+ * specific to a given channel type.  For example, in channeltls.c, the cells
+ * for the initial connection handshake are handled before we get here.  (Of
+ * course, the fact that there _is_ only one channel type for now means that
+ * we may have gotten the factoring wrong here.)
+ *
+ * Handling other cell types is mainly farmed off to other modules, after
+ * initial sanity-checking.  CREATE* cells are handled ultimately in onion.c,
+ * CREATED* cells trigger circuit creation in circuitbuild.c, DESTROY cells
+ * are handled here (since they're simple), and RELAY cells, in all their
+ * complexity, are passed off to relay.c.
  **/
 
 /* In-points to command.c:

+ 45 - 0
src/or/connection_edge.c

@@ -7,6 +7,51 @@
 /**
  * \file connection_edge.c
  * \brief Handle edge streams.
+ *
+ * An edge_connection_t is a subtype of a connection_t, and represents two
+ * critical concepts in Tor: a stream, and an edge connection.  From the Tor
+ * protocol's point of view, a stream is a bi-directional channel that is
+ * multiplexed on a single circuit.  Each stream on a circuit is identified
+ * with a separate 16-bit stream ID, local to the (circuit,exit) pair.
+ * Streams are created in response to client requests.
+ *
+ * An edge connection is one thing that can implement a stream: it is either a
+ * TCP application socket that has arrived via (e.g.) a SOCKS request, or an
+ * exit connection.
+ *
+ * Not every instance of edge_connection_t truly represents an edge connction,
+ * however. (Sorry!) We also create edge_connection_t objects for streams that
+ * we will not be handling with TCP.  The types of these streams are:
+ *   <ul>
+ *   <li>DNS lookup streams, created on the client side in response to
+ *     a UDP DNS request received on a DNSPort, or a RESOLVE command
+ *     on a controller.
+ *   <li>DNS lookup streams, created on the exit side in response to
+ *     a RELAY_RESOLVE cell from a client.
+ *   <li>Tunneled directory streams, created on the directory cache side
+ *     in response to a RELAY_BEGINDIR cell.  These streams attach directly
+ *     to a dir_connection_t object without ever using TCP.
+ *   </ul>
+ *
+ * This module handles general-purpose functionality having to do with
+ * edge_connection_t.  On the client side, it accepts various types of
+ * application requests on SocksPorts, TransPorts, and NATDPorts, and
+ * creates streams appropriately.
+ *
+ * This module is also responsible for implementing stream isolation:
+ * ensuring that streams that should not be linkable to one another are
+ * kept to different circuits.
+ *
+ * On the exit side, this module handles the various stream-creating
+ * type of RELAY cells by launching appropriate outgoing connections,
+ * DNS requests, or directory connection objects.
+ *
+ * And for all edge connections, this module is responsible for handling
+ * incoming and outdoing data as it arrives or leaves in the relay.c
+ * module.  (Outgoing data will be packaged in
+ * connection_edge_process_inbuf() as it calls
+ * connection_edge_package_raw_inbuf(); incoming data from RELAY_DATA
+ * cells is applied in connection_edge_process_relay_cell().)
  **/
 #define CONNECTION_EDGE_PRIVATE
 

+ 25 - 1
src/or/control.c

@@ -5,7 +5,31 @@
 /**
  * \file control.c
  * \brief Implementation for Tor's control-socket interface.
- *   See doc/spec/control-spec.txt for full details on protocol.
+ *
+ * A "controller" is an external program that monitors and controls a Tor
+ * instance via a text-based protocol. It connects to Tor via a connection
+ * to a local socket.
+ *
+ * The protocol is line-driven.  The controller sends commands terminated by a
+ * CRLF.  Tor sends lines that are either <em>replies</em> to what the
+ * controller has said, or <em>events</em> that Tor sends to the controller
+ * asynchronously based on occurrences in the Tor network model.
+ *
+ * See the control-spec.txt file in the torspec.git repository for full
+ * details on protocol.
+ *
+ * This module generally has two kinds of entry points: those based on having
+ * received a command on a controller socket, which are handled in
+ * connection_control_process_inbuf(), and dispatched to individual functions
+ * with names like control_handle_COMMANDNAME(); and those based on events
+ * that occur elsewhere in Tor, which are handled by functions with names like
+ * control_event_EVENTTYPE().
+ *
+ * Controller events are not sent immediately; rather, they are inserted into
+ * the queued_control_events array, and flushed later from
+ * flush_queued_events_cb().  Doing this simplifies our callgraph greatly,
+ * by limiting the number of places in Tor that can call back into the network
+ * stack.
  **/
 
 #define CONTROL_PRIVATE

+ 5 - 1
src/or/cpuworker.c

@@ -8,7 +8,11 @@
  * \brief Uses the workqueue/threadpool code to farm CPU-intensive activities
  * out to subprocesses.
  *
- * Right now, we only use this for processing onionskins.
+ * The multithreading backend for this module is in workqueue.c; this module
+ * specializes workqueue.c.
+ *
+ * Right now, we only use this for processing onionskins, and invoke it mostly
+ * from onion.c.
  **/
 #include "or.h"
 #include "channel.h"

+ 30 - 4
src/or/dircollate.c

@@ -8,6 +8,17 @@
  *
  * \brief Collation code for figuring out which identities to vote for in
  *   the directory voting process.
+ *
+ * During the consensus calculation, when an authority is looking at the vote
+ * documents from all the authorities, it needs to compute the consensus for
+ * each relay listed by at least one authority.  But the notion of "each
+ * relay" can be tricky: some relays have Ed25519 keys, and others don't.
+ *
+ * Moreover, older consensus methods did RSA-based ID collation alone, and
+ * ignored Ed25519 keys.  We need to support those too until we're completely
+ * sure that authorities will never downgrade.
+ *
+ * This module is invoked exclusively from dirvote.c.
  */
 
 #define DIRCOLLATE_PRIVATE
@@ -21,6 +32,9 @@ static void dircollator_collate_by_ed25519(dircollator_t *dc);
  * RSA SHA1 digest) to an array of vote_routerstatus_t. */
 typedef struct ddmap_entry_s {
   HT_ENTRY(ddmap_entry_s) node;
+  /** A SHA1-RSA1024 identity digest and Ed25519 identity key,
+   * concatenated.  (If there is no ed25519 identity key, there is no
+   * entry in this table.) */
   uint8_t d[DIGEST_LEN + DIGEST256_LEN];
   /* The nth member of this array corresponds to the vote_routerstatus_t (if
    * any) received for this digest pair from the nth voter. */
@@ -43,12 +57,16 @@ ddmap_entry_new(int n_votes)
                          sizeof(vote_routerstatus_t *) * n_votes);
 }
 
+/** Helper: compute a hash of a single ddmap_entry_t's identity (or
+ * identities) */
 static unsigned
 ddmap_entry_hash(const ddmap_entry_t *ent)
 {
   return (unsigned) siphash24g(ent->d, sizeof(ent->d));
 }
 
+/** Helper: return true if <b>a</b> and <b>b</b> have the same
+ * identity/identities. */
 static unsigned
 ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b)
 {
@@ -56,7 +74,7 @@ ddmap_entry_eq(const ddmap_entry_t *a, const ddmap_entry_t *b)
 }
 
 /** Record the RSA identity of <b>ent</b> as <b>rsa_sha1</b>, and the
- * ed25519 identity as <b>ed25519</b>. */
+ * ed25519 identity as <b>ed25519</b>.  Both must be provided. */
 static void
 ddmap_entry_set_digests(ddmap_entry_t *ent,
                         const uint8_t *rsa_sha1,
@@ -72,8 +90,12 @@ HT_GENERATE2(double_digest_map, ddmap_entry_s, node, ddmap_entry_hash,
              ddmap_entry_eq, 0.6, tor_reallocarray, tor_free_)
 
 /** Helper: add a single vote_routerstatus_t <b>vrs</b> to the collator
- * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of
- * its RSA key digest and Ed25519 key.  */
+ * <b>dc</b>, indexing it by its RSA key digest, and by the 2-tuple of its RSA
+ * key digest and Ed25519 key.   It must come from the <b>vote_num</b>th
+ * vote.
+ *
+ * Requires that the vote is well-formed -- that is, that it has no duplicate
+ * routerstatus entries.  We already checked for that when parsing the vote. */
 static void
 dircollator_add_routerstatus(dircollator_t *dc,
                              int vote_num,
@@ -82,9 +104,12 @@ dircollator_add_routerstatus(dircollator_t *dc,
 {
   const char *id = vrs->status.identity_digest;
 
+  /* Clear this flag; we might set it later during the voting process */
   vrs->ed25519_reflects_consensus = 0;
 
-  (void) vote;
+  (void) vote; // We don't currently need this.
+
+  /* First, add this item to the appropriate RSA-SHA-Id array. */
   vote_routerstatus_t **vrs_lst = digestmap_get(dc->by_rsa_sha1, id);
   if (NULL == vrs_lst) {
     vrs_lst = tor_calloc(dc->n_votes, sizeof(vote_routerstatus_t *));
@@ -98,6 +123,7 @@ dircollator_add_routerstatus(dircollator_t *dc,
   if (! vrs->has_ed25519_listing)
     return;
 
+  /* Now add it to the appropriate <Ed,RSA-SHA-Id> array. */
   ddmap_entry_t search, *found;
   memset(&search, 0, sizeof(search));
   ddmap_entry_set_digests(&search, (const uint8_t *)id, ed);

+ 18 - 0
src/or/dirserv.c

@@ -36,6 +36,24 @@
  * \file dirserv.c
  * \brief Directory server core implementation. Manages directory
  * contents and generates directories.
+ *
+ * This module implements most of directory cache functionality, and some of
+ * the directory authority functionality.  The directory.c module delegates
+ * here in order to handle incoming requests from clients, via
+ * connection_dirserv_flushed_some() and its kin.  In order to save RAM, this
+ * module is reponsible for spooling directory objects (in whole or in part)
+ * onto buf_t instances, and then closing the dir_connection_t once the
+ * objects are totally flushed.
+ *
+ * The directory.c module also delegates here for handling descriptor uploads
+ * via dirserv_add_multiple_descriptors().
+ *
+ * Additionally, this module handles some aspects of voting, including:
+ * deciding how to vote on individual flags (based on decisions reached in
+ * rephist.c), of formatting routerstatus lines, and deciding what relays to
+ * include in an authority's vote.  (TODO: Those functions could profitably be
+ * split off.  They only live in this file because historically they were
+ * shared among the v1, v2, and v3 directory code.)
  */
 
 /** How far in the future do we allow a router to get? (seconds) */

+ 50 - 3
src/or/dns.c

@@ -9,6 +9,42 @@
  * This is implemented as a wrapper around Adam Langley's eventdns.c code.
  * (We can't just use gethostbyname() and friends because we really need to
  * be nonblocking.)
+ *
+ * There are three main cases when a Tor relay uses dns.c to launch a DNS
+ * request:
+ *   <ol>
+ *    <li>To check whether the DNS server is working more or less correctly.
+ *      This happens via dns_launch_correctness_checks().  The answer is
+ *      reported in the return value from later calls to
+ *      dns_seems_to_be_broken().
+ *    <li>When a client has asked the relay, in a RELAY_BEGIN cell, to connect
+ *      to a given server by hostname.  This happens via dns_resolve().
+ *    <li>When a client has asked the rela, in a RELAY_RESOLVE cell, to look
+ *      up a given server's IP address(es) by hostname. This also happens via
+ *      dns_resolve().
+ *   </ol>
+ *
+ * Each of these gets handled a little differently.
+ *
+ * To check for correctness, we look up some hostname we expect to exist and
+ * have real entries, some hostnames which we expect to definitely not exist,
+ * and some hostnames that we expect to probably not exist.  If too many of
+ * the hostnames that shouldn't exist do exist, that's a DNS hijacking
+ * attempt.  If too many of the hostnames that should exist have the same
+ * addresses as the ones that shouldn't exist, that's a very bad DNS hijacking
+ * attempt, or a very naughty captive portal.  And if the hostnames that
+ * should exist simply don't exist, we probably have a broken nameserver.
+ *
+ * To handle client requests, we first check our cache for answers. If there
+ * isn't something up-to-date, we've got to launch A or AAAA requests as
+ * appropriate.  How we handle responses to those in particular is a bit
+ * complex; see dns_lookup() and set_exitconn_info_from_resolve().
+ *
+ * When a lookup is finally complete, the inform_pending_connections()
+ * function will tell all of the streams that have been waiting for the
+ * resolve, by calling connection_exit_connect() if the client sent a
+ * RELAY_BEGIN cell, and by calling send_resolved_cell() or
+ * send_hostname_cell() if the client sent a RELAY_RESOLVE cell.
  **/
 
 #define DNS_PRIVATE
@@ -793,8 +829,14 @@ dns_resolve_impl,(edge_connection_t *exitconn, int is_resolve,
 }
 
 /** Given an exit connection <b>exitconn</b>, and a cached_resolve_t
- * <b>resolve</b> whose DNS lookups have all succeeded or failed, update the
- * appropriate fields (address_ttl and addr) of <b>exitconn</b>.
+ * <b>resolve</b> whose DNS lookups have all either succeeded or failed,
+ * update the appropriate fields (address_ttl and addr) of <b>exitconn</b>.
+ *
+ * The logic can be complicated here, since we might have launched both
+ * an A lookup and an AAAA lookup, and since either of those might have
+ * succeeded or failed, and since we want to answer a RESOLVE cell with
+ * a full answer but answer a BEGIN cell with whatever answer the client
+ * would accept <i>and</i> we could still connect to.
  *
  * If this is a reverse lookup, set *<b>hostname_out</b> to a newly allocated
  * copy of the name resulting hostname.
@@ -1137,7 +1179,12 @@ dns_found_answer(const char *address, uint8_t query_type,
 
 /** Given a pending cached_resolve_t that we just finished resolving,
  * inform every connection that was waiting for the outcome of that
- * resolution. */
+ * resolution.
+ *
+ * Do this by sending a RELAY_RESOLVED cell (if the pending stream had sent us
+ * RELAY_RESOLVE cell), or by launching an exit connection (if the pending
+ * stream had send us a RELAY_BEGIN cell).
+ */
 static void
 inform_pending_connections(cached_resolve_t *resolve)
 {

+ 12 - 0
src/or/dns_structs.h

@@ -1,3 +1,15 @@
+/* Copyright (c) 2003-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file dns_structs.h
+ *
+ * \brief Structures used in dns.c. Exposed to dns.c, and to the unit tests
+ * that declare DNS_PRIVATE.
+ */
+
 #ifndef TOR_DNS_STRUCTS_H
 #define TOR_DNS_STRUCTS_H
 

+ 8 - 0
src/or/fp_pair.c

@@ -7,6 +7,14 @@
  * \brief Manages data structures for associating pairs of fingerprints. Used
  * to handle combinations of identity/signing-key fingerprints for
  * authorities.
+ *
+ * This is a nice, simple, compact data structure module that handles a map
+ * from (signing key fingerprint, identity key fingerprint) to void *.  The
+ * fingerprints here are SHA1 digests of RSA keys.
+ *
+ * This structure is used in directory.c and in routerlist.c for handling
+ * handling authority certificates, since we never want more than a single
+ * certificate for any (ID key, signing key) pair.
  **/
 
 #include "or.h"

+ 18 - 0
src/or/geoip.c

@@ -7,6 +7,24 @@
  * to summarizing client connections by country to entry guards, bridges,
  * and directory servers; and for statistics on answering network status
  * requests.
+ *
+ * There are two main kinds of functions in this module: geoip functions,
+ * which map groups of IPv4 and IPv6 addresses to country codes, and
+ * statistical functions, which collect statistics about different kinds of
+ * per-country usage.
+ *
+ * The geoip lookup tables are implemented as sorted lists of disjoint address
+ * ranges, each mapping to a singleton geoip_country_t.  These country objects
+ * are also indexed by their names in a hashtable.
+ *
+ * The tables are populated from disk at startup by the geoip_load_file()
+ * function.  For more information on the file format they read, see that
+ * function.  See the scripts and the README file in src/config for more
+ * information about how those files are generated.
+ *
+ * Tor uses GeoIP information in order to implement user requests (such as
+ * ExcludeNodes {cc}), and to keep track of how much usage relays are getting
+ * for each country.
  */
 
 #define GEOIP_PRIVATE

+ 18 - 6
src/or/hibernate.c

@@ -8,6 +8,12 @@
  * etc in preparation for closing down or going dormant; and to track
  * bandwidth and time intervals to know when to hibernate and when to
  * stop hibernating.
+ *
+ * Ordinarily a Tor relay is "Live".
+ *
+ * A live relay can stop accepting connections for one of two reasons: either
+ * it is trying to conserve bandwidth because of bandwidth accounting rules
+ * ("soft hibernation"), or it is about to shut down ("exiting").
  **/
 
 /*
@@ -49,8 +55,10 @@ typedef enum {
   UNIT_MONTH=1, UNIT_WEEK=2, UNIT_DAY=3,
 } time_unit_t;
 
-/* Fields for accounting logic.  Accounting overview:
+/*
+ * @file hibernate.c
  *
+ * <h4>Accounting</h4>
  * Accounting is designed to ensure that no more than N bytes are sent in
  * either direction over a given interval (currently, one month, one week, or
  * one day) We could
@@ -64,17 +72,21 @@ typedef enum {
  *
  * Each interval runs as follows:
  *
- * 1. We guess our bandwidth usage, based on how much we used
+ * <ol>
+ * <li>We guess our bandwidth usage, based on how much we used
  *     last time.  We choose a "wakeup time" within the interval to come up.
- * 2. Until the chosen wakeup time, we hibernate.
- * 3. We come up at the wakeup time, and provide bandwidth until we are
+ * <li>Until the chosen wakeup time, we hibernate.
+ * <li> We come up at the wakeup time, and provide bandwidth until we are
  *    "very close" to running out.
- * 4. Then we go into low-bandwidth mode, and stop accepting new
+ * <li> Then we go into low-bandwidth mode, and stop accepting new
  *    connections, but provide bandwidth until we run out.
- * 5. Then we hibernate until the end of the interval.
+ * <li> Then we hibernate until the end of the interval.
  *
  * If the interval ends before we run out of bandwidth, we go back to
  * step one.
+ *
+ * Accounting is controlled by the AccountingMax, AccountingRule, and
+ * AccountingStart options.
  */
 
 /** How many bytes have we read in this accounting interval? */

+ 12 - 0
src/or/keypin.c

@@ -39,16 +39,28 @@
  * @brief Key-pinning for RSA and Ed25519 identity keys at directory
  *  authorities.
  *
+ * Many older clients, and many internal interfaces, still refer to relays by
+ * their RSA1024 identity keys.  We can make this more secure, however:
+ * authorities use this module to track which RSA keys have been used along
+ * with which Ed25519 keys, and force such associations to be permanent.
+ *
  * This module implements a key-pinning mechanism to ensure that it's safe
  * to use RSA keys as identitifers even as we migrate to Ed25519 keys.  It
  * remembers, for every Ed25519 key we've seen, what the associated Ed25519
  * key is.  This way, if we see a different Ed25519 key with that RSA key,
  * we'll know that there's a mismatch.
  *
+ * (As of this writing, these key associations are advisory only, mostly
+ * because some relay operators kept mishandling their Ed25519 keys during
+ * the initial Ed25519 rollout.  We should fix this problem, and then toggle
+ * the AuthDirPinKeys option.)
+ *
  * We persist these entries to disk using a simple format, where each line
  * has a base64-encoded RSA SHA1 hash, then a base64-endoded Ed25519 key.
  * Empty lines, misformed lines, and lines beginning with # are
  * ignored. Lines beginning with @ are reserved for future extensions.
+ *
+ * The dirserv.c module is the main user of these functions.
  */
 
 static int keypin_journal_append_entry(const uint8_t *rsa_id_digest,

+ 9 - 1
src/or/ntmain.c

@@ -6,7 +6,15 @@
 /**
  * \file ntmain.c
  *
- * \brief Entry points for running/configuring Tor as Windows Service.
+ * \brief Entry points for running/configuring Tor as a Windows Service.
+ *
+ * Windows Services expect to be registered with the operating system, and to
+ * have entry points for starting, stopping, and monitoring them.  This module
+ * implements those entry points so that a tor relay or client or hidden
+ * service can run as a Windows service.  Therefore, this module
+ * is only compiled when building for Windows.
+ *
+ * Warning: this module is not very well tested or very well maintained.
  */
 
 #ifdef _WIN32

+ 52 - 0
src/or/onion.c

@@ -8,6 +8,58 @@
  * \file onion.c
  * \brief Functions to queue create cells, wrap the various onionskin types,
  * and parse and create the CREATE cell and its allies.
+ *
+ * This module has a few functions, all related to the CREATE/CREATED
+ * handshake that we use on links in order to create a circuit, and the
+ * related EXTEND/EXTENDED handshake that we use over circuits in order to
+ * extend them an additional hop.
+ *
+ * In this module, we provide a set of abstractions to create a uniform
+ * interface over the three circuit extension handshakes that Tor has used
+ * over the years (TAP, CREATE_FAST, and ntor).  These handshakes are
+ * implemented in onion_tap.c, onion_fast.c, and onion_ntor.c respectively.
+ *
+ * All[*] of these handshakes follow a similar pattern: a client, knowing
+ * some key from the relay it wants to extend through, generates the
+ * first part of a handshake. A relay receives that handshake, and sends
+ * a reply.  Once the client handles the reply, it knows that it is
+ * talking to the right relay, and it shares some freshly negotiated key
+ * material with that relay.
+ *
+ * We sometimes call the client's part of the handshake an "onionskin".
+ * We do this because historically, Onion Routing used a multi-layer
+ * structure called an "onion" to construct circuits. Each layer of the
+ * onion contained key material chosen by the client, the identity of
+ * the next relay in the circuit, and a smaller onion, encrypted with
+ * the key of the next relay.  When we changed Tor to use a telescoping
+ * circuit extension design, it corresponded to sending each layer of the
+ * onion separately -- as a series of onionskins.
+ *
+ * Clients invoke these functions when creating or extending a circuit,
+ * from circuitbuild.c.
+ *
+ * Relays invoke these functions when they receive a CREATE or EXTEND
+ * cell in command.c or relay.c, in order to queue the pending request.
+ * They also invoke them from cpuworker.c, which handles dispatching
+ * onionskin requests to different worker threads.
+ *
+ * <br>
+ *
+ * This module also handles:
+ *  <ul>
+ *  <li> Queueing incoming onionskins on the relay side before passing
+ *      them to worker threads.
+ *   <li>Expiring onionskins on the relay side if they have waited for
+ *     too long.
+ *   <li>Packaging private keys on the server side in order to pass
+ *     them to worker threads.
+ *   <li>Encoding and decoding CREATE, CREATED, CREATE2, and CREATED2 cells.
+ *   <li>Encoding and decodign EXTEND, EXTENDED, EXTEND2, and EXTENDED2
+ *    relay cells.
+ * </ul>
+ *
+ * [*] The CREATE_FAST handshake is weaker than described here; see
+ * onion_fast.c for more information.
  **/
 
 #include "or.h"

+ 18 - 0
src/or/onion_fast.c

@@ -7,6 +7,24 @@
 /**
  * \file onion_fast.c
  * \brief Functions implement the CREATE_FAST circuit handshake.
+ *
+ * The "CREATE_FAST" handshake is an unauthenticated, non-forward-secure
+ * key derivation mechanism based on SHA1.  We used to use it for the
+ * first hop of each circuit, since the TAP handshake provided no
+ * additional security beyond the security already provided by the TLS
+ * handshake [*].
+ *
+ * When we switched to ntor, we deprecated CREATE_FAST, since ntor is
+ * stronger than our TLS handshake was, and fast enough to not be worrisome.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
+ *
+ * [*]Actually, it's possible that TAP _was_ a little better than TLS with
+ * RSA1024 certificates and EDH1024 for forward secrecy, if you
+ * hypothesize an adversary who can compute discrete logarithms on a
+ * small number of targetted DH1024 fields, but who can't break all that
+ * many RSA1024 keys.
  **/
 
 #include "or.h"

+ 11 - 0
src/or/onion_ntor.c

@@ -5,6 +5,17 @@
  * \file onion_ntor.c
  *
  * \brief Implementation for the ntor handshake.
+ *
+ * The ntor circuit-extension handshake was developed as a replacement
+ * for the old TAP handshake.  It uses Elliptic-curve Diffie-Hellman and
+ * a hash function in order to perform a one-way authenticated key
+ * exchange.  The ntor handshake is meant to replace the old "TAP"
+ * handshake.
+ *
+ * We instantiate ntor with curve25519, HMAC-SHA256, and HKDF.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
  */
 
 #include "orconfig.h"

+ 12 - 0
src/or/onion_tap.c

@@ -9,10 +9,22 @@
  * \brief Functions to implement the original Tor circuit extension handshake
  * (a.k.a TAP).
  *
+ * The "TAP" handshake is the first one that was widely used in Tor: It
+ * combines RSA1024-OAEP and AES128-CTR to perform a hybrid encryption over
+ * the first message DH1024 key exchange.  (The RSA-encrypted part of the
+ * encryption is authenticated; the AES-encrypted part isn't. This was
+ * not a smart choice.)
+ *
  * We didn't call it "TAP" ourselves -- Ian Goldberg named it in "On the
  * Security of the Tor Authentication Protocol".  (Spoiler: it's secure, but
  * its security is kind of fragile and implementation dependent.  Never modify
  * this implementation without reading and understanding that paper at least.)
+ *
+ * We have deprecated TAP since the ntor handshake came into general use.  It
+ * is still used for hidden service IP and RP connections, however.
+ *
+ * This handshake, like the other circuit-extension handshakes, is
+ * invoked from onion.c.
  **/
 
 #include "or.h"

+ 4 - 0
src/or/periodic.c

@@ -5,6 +5,10 @@
  * \file periodic.c
  *
  * \brief Generic backend for handling periodic events.
+ *
+ * The events in this module are used by main.c to track items that need
+ * to fire once every N seconds, possibly picking a new interval each time
+ * that they fire.  See periodic_events[] in main.c for examples.
  */
 
 #include "or.h"

+ 24 - 0
src/or/protover.c

@@ -1,3 +1,24 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file protover.c
+ * \brief Versioning information for different pieces of the Tor protocol.
+ *
+ * Starting in version 0.2.9.3-alpha, Tor places separate version numbers on
+ * each of the different components of its protocol. Relays use these numbers
+ * to advertise what versions of the protocols they can support, and clients
+ * use them to find what they can ask a given relay to do.  Authorities vote
+ * on the supported protocol versions for each relay, and also vote on the
+ * which protocols you should have to support in order to be on the Tor
+ * network. All Tor instances use these required/recommended protocol versions
+ * to
+ *
+ * The main advantage of these protocol versions numbers over using Tor
+ * version numbers is that they allow different implementations of the Tor
+ * protocols to develop independently, without having to claim compatibility
+ * with specific versions of Tor.
+ **/
 
 #define PROTOVER_PRIVATE
 
@@ -699,6 +720,9 @@ protover_compute_for_old_tor(const char *version)
   }
 }
 
+/**
+ * Release all storage held by static fields in protover.c
+ */
 void
 protover_free_all(void)
 {

+ 7 - 0
src/or/protover.h

@@ -1,3 +1,10 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file protover.h
+ * \brief Headers and type declarations for protover.c
+ **/
 
 #ifndef TOR_PROTOVER_H
 #define TOR_PROTOVER_H

+ 6 - 0
src/or/reasons.c

@@ -6,6 +6,12 @@
  * \file reasons.c
  * \brief Convert circuit, stream, and orconn error reasons to and/or from
  * strings and errno values.
+ *
+ * This module is just a bunch of functions full of case statements that
+ * convert from one representation of our error codes to another. These are
+ * mainly used in generating log messages, in sending messages to the
+ * controller in control.c, and in converting errors from one protocol layer
+ * to another.
  **/
 
 #include "or.h"

+ 68 - 2
src/or/rephist.c

@@ -4,10 +4,74 @@
 
 /**
  * \file rephist.c
- * \brief Basic history and "reputation" functionality to remember
+ * \brief Basic history and performance-tracking functionality.
+ *
+ * Basic history and performance-tracking functionality to remember
  *    which servers have worked in the past, how much bandwidth we've
  *    been using, which ports we tend to want, and so on; further,
  *    exit port statistics, cell statistics, and connection statistics.
+ *
+ * The history and information tracked in this module could sensibly be
+ * divided into several categories:
+ *
+ * <ul><li>Statistics used by authorities to remember the uptime and
+ * stability information about various relays, including "uptime",
+ * "weighted fractional uptime" and "mean time between failures".
+ *
+ * <li>Bandwidth usage history, used by relays to self-report how much
+ * bandwidth they've used for different purposes over last day or so,
+ * in order to generate the {dirreq-,}{read,write}-history lines in
+ * that they publish.
+ *
+ * <li>Predicted ports, used by clients to remember how long it's been
+ * since they opened an exit connection to each given target
+ * port. Clients use this information in order to try to keep circuits
+ * open to exit nodes that can connect to the ports that they care
+ * about.  (The predicted ports mechanism also handles predicted circuit
+ * usage that _isn't_ port-specific, such as resolves, internal circuits,
+ * and so on.)
+ *
+ * <li>Public key operation counters, for tracking how many times we've
+ * done each public key operation.  (This is unmaintained and we should
+ * remove it.)
+ *
+ * <li>Exit statistics by port, used by exits to keep track of the
+ * number of streams and bytes they've served at each exit port, so they
+ * can generate their exit-kibibytes-{read,written} and
+ * exit-streams-opened statistics.
+ *
+ * <li>Circuit stats, used by relays instances to tract circuit
+ * queue fullness and delay over time, and generate cell-processed-cells,
+ * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile
+ * statistics.
+ *
+ * <li>Descriptor serving statistics, used by directory caches to track
+ * how many descriptors they've served.
+ *
+ * <li>Connection statistics, used by relays to track one-way and
+ * bidirectional connections.
+ *
+ * <li>Onion handshake statistics, used by relays to count how many
+ * TAP and ntor handshakes they've handled.
+ *
+ * <li>Hidden service statistics, used by relays to count rendezvous
+ * traffic and HSDir-stored descriptors.
+ *
+ * <li>Link protocol statistics, used by relays to count how many times
+ * each link protocol has been used.
+ *
+ * </ul>
+ *
+ * The entry points for this module are scattered throughout the
+ * codebase.  Sending data, receiving data, connecting to a relay,
+ * losing a connection to a relay, and so on can all trigger a change in
+ * our current stats.  Relays also invoke this module in order to
+ * extract their statistics when building routerinfo and extrainfo
+ * objects in router.c.
+ *
+ * TODO: This module should be broken up.
+ *
+ * (The "rephist" name originally stood for "reputation and history". )
  **/
 
 #include "or.h"
@@ -2650,7 +2714,9 @@ rep_hist_desc_stats_write(time_t now)
   return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
 }
 
-/* DOCDOC rep_hist_note_desc_served */
+/** Called to note that we've served a given descriptor (by
+ * digest). Incrememnts the count of descriptors served, and the number
+ * of times we've served this descriptor. */
 void
 rep_hist_note_desc_served(const char * desc)
 {

+ 12 - 0
src/or/replaycache.c

@@ -5,6 +5,18 @@
  * \file replaycache.c
  *
  * \brief Self-scrubbing replay cache for rendservice.c
+ *
+ * To prevent replay attacks, hidden services need to recognize INTRODUCE2
+ * cells that they've already seen, and drop them.  If they didn't, then
+ * sending the same INTRODUCE2 cell over and over would force the hidden
+ * service to make a huge number of circuits to the same rendezvous
+ * point, aiding traffic analysis.
+ *
+ * (It's not that simple, actually.  We only check for replays in the
+ * RSA-encrypted portion of the handshake, since the rest of the handshake is
+ * malleable.)
+ *
+ * This module is used from rendservice.c.
  */
 
 #define REPLAYCACHE_PRIVATE

+ 85 - 1
src/or/routerlist.c

@@ -9,6 +9,85 @@
  * \brief Code to
  * maintain and access the global list of routerinfos for known
  * servers.
+ *
+ * A "routerinfo_t" object represents a single self-signed router
+ * descriptor, as generated by a Tor relay in order to tell the rest of
+ * the world about its keys, address, and capabilities.  An
+ * "extrainfo_t" object represents an adjunct "extra-info" object,
+ * certified by a corresponding router descriptor, reporting more
+ * information about the relay that nearly all users will not need.
+ *
+ * Most users will not use router descriptors for most relays.  Instead,
+ * they use the information in microdescriptors and in the consensus
+ * networkstatus.
+ *
+ * Right now, routerinfo_t objects are used in these ways:
+ *  <ul>
+ *   <li>By clients, in order to learn about bridge keys and capabilities.
+ *     (Bridges aren't listed in the consensus networkstatus, so they
+ *     can't have microdescriptors.)
+ *   <li>By relays, since relays want more information about other relays
+ *     than they can learn from microdescriptors. (TODO: Is this still true?)
+ *   <li>By authorities, which receive them and use them to generate the
+ *     consensus and the microdescriptors.
+ *   <li>By all directory caches, which download them in case somebody
+ *     else wants them.
+ *  </ul>
+ *
+ * Routerinfos are mostly created by parsing them from a string, in
+ * routerparse.c. We store them to disk on receiving them, and
+ * periodically discard the ones we don't need. On restarting, we
+ * re-read them from disk. (This also applies to extrainfo documents, if
+ * we are configured to fetch them.)
+ *
+ * In order to keep our list of routerinfos up-to-date, we periodically
+ * check whether there are any listed in the latest consensus (or in the
+ * votes from other authorities, if we are an authority) that we don't
+ * have.  (This also applies to extrainfo documents, if we are
+ * configured to fetch them.)
+ *
+ * Almost nothing in Tor should use a routerinfo_t to refer directly to
+ * a relay; instead, almost everything should use node_t (implemented in
+ * nodelist.c), which provides a common interface to routerinfo_t,
+ * routerstatus_t, and microdescriptor_t.
+ *
+ * <br>
+ *
+ * This module also has some of the functions used for choosing random
+ * nodes according to different rules and weights.  Historically, they
+ * were all in this module.  Now, they are spread across this module,
+ * nodelist.c, and networkstatus.c.  (TODO: Fix that.)
+ *
+ * <br>
+ *
+ * (For historical reasons) this module also contains code for handling
+ * the list of fallback directories, the list of directory authorities,
+ * and the list of authority certificates.
+ *
+ * For the directory authorities, we have a list containing the public
+ * identity key, and contact points, for each authority.  The
+ * authorities receive descriptors from relays, and publish consensuses,
+ * descriptors, and microdescriptors.  This list is pre-configured.
+ *
+ * Fallback directories are well-known, stable, but untrusted directory
+ * caches that clients which have not yet bootstrapped can use to get
+ * their first networkstatus consensus, in order to find out where the
+ * Tor network really is.  This list is pre-configured in
+ * fallback_dirs.inc.  Every authority also serves as a fallback.
+ *
+ * Both fallback directories and directory authorities are are
+ * represented by a dir_server_t.
+ *
+ * Authority certificates are signed with authority identity keys; they
+ * are used to authenticate shorter-term authority signing keys. We
+ * fetch them when we find a consensus or a vote that has been signed
+ * with a signing key we don't recognize.  We cache them on disk and
+ * load them on startup.  Authority operators generate them with the
+ * "tor-gencert" utility.
+ *
+ * TODO: Authority certificates should be a separate module.
+ *
+ * TODO: dir_server_t stuff should be in a separate module.
  **/
 
 #define ROUTERLIST_PRIVATE
@@ -46,6 +125,9 @@
 
 /****************************************************************************/
 
+/* Typed wrappers for different digestmap types; used to avoid type
+ * confusion. */
+
 DECLARE_TYPED_DIGESTMAP_FNS(sdmap_, digest_sd_map_t, signed_descriptor_t)
 DECLARE_TYPED_DIGESTMAP_FNS(rimap_, digest_ri_map_t, routerinfo_t)
 DECLARE_TYPED_DIGESTMAP_FNS(eimap_, digest_ei_map_t, extrainfo_t)
@@ -800,7 +882,9 @@ static const char *BAD_SIGNING_KEYS[] = {
   NULL,
 };
 
-/* DOCDOC */
+/** Return true iff <b>cert</b> authenticates some atuhority signing key
+ * which, because of the old openssl heartbleed vulnerability, should
+ * never be trusted. */
 int
 authority_cert_is_blacklisted(const authority_cert_t *cert)
 {

+ 55 - 8
src/or/routerparse.c

@@ -6,7 +6,51 @@
 
 /**
  * \file routerparse.c
- * \brief Code to parse and validate router descriptors and directories.
+ * \brief Code to parse and validate router descriptors, consenus directories,
+ *   and similar objects.
+ *
+ * The objects parsed by this module use a common text-based metaformat,
+ * documented in dir-spec.txt in torspec.git.  This module is itself divided
+ * into two major kinds of function: code to handle the metaformat, and code
+ * to convert from particular instances of the metaformat into the
+ * objects that Tor uses.
+ *
+ * The generic parsing code works by calling a table-based tokenizer on the
+ * input string.  Each token corresponds to a single line with a token, plus
+ * optional arguments on that line, plus an optional base-64 encoded object
+ * after that line.  Each token has a definition in a table of token_rule_t
+ * entries that describes how many arguments it can take, whether it takes an
+ * object, how many times it may appear, whether it must appear first, and so
+ * on.
+ *
+ * The tokenizer function tokenize_string() converts its string input into a
+ * smartlist full of instances of directory_token_t, according to a provided
+ * table of token_rule_t.
+ *
+ * The generic parts of this module additionally include functions for
+ * finding the start and end of signed information inside a signed object, and
+ * computing the digest that will be signed.
+ *
+ * There are also functions for saving objects to disk that have caused
+ * parsing to fail.
+ *
+ * The specific parts of this module describe conversions between
+ * particular lists of directory_token_t and particular objects.  The
+ * kinds of objects that can be parsed here are:
+ *  <ul>
+ *  <li>router descriptors (managed from routerlist.c)
+ *  <li>extra-info documents (managed from routerlist.c)
+ *  <li>microdescriptors (managed from microdesc.c)
+ *  <li>vote and consensus networkstatus documents, and the routerstatus_t
+ *    objects that they comprise (managed from networkstatus.c)
+ *  <li>detached-signature objects used by authorities for gathering
+ *    signatures on the networkstatus consensus (managed from dirvote.c)
+ *  <li>authority key certificates (managed from routerlist.c)
+ *  <li>hidden service descriptors (managed from rendcommon.c and rendcache.c)
+ * </ul>
+ *
+ * For no terribly good reason, the functions to <i>generate</i> signatures on
+ * the above directory objects are also in this module.
  **/
 
 #define ROUTERPARSE_PRIVATE
@@ -258,12 +302,14 @@ typedef struct token_rule_t {
   int is_annotation;
 } token_rule_t;
 
-/*
+/**
+ * @name macros for defining token rules
+ *
  * Helper macros to define token tables.  's' is a string, 't' is a
  * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an
  * object syntax.
- *
  */
+/**@{*/
 
 /** Appears to indicate the end of a table. */
 #define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 }
@@ -284,16 +330,17 @@ typedef struct token_rule_t {
 /** An annotation that must appear no more than once */
 #define A01(s,t,a,o)  { s, t, a, o, 0, 1, 0, 1 }
 
-/* Argument multiplicity: any number of arguments. */
+/** Argument multiplicity: any number of arguments. */
 #define ARGS        0,INT_MAX,0
-/* Argument multiplicity: no arguments. */
+/** Argument multiplicity: no arguments. */
 #define NO_ARGS     0,0,0
-/* Argument multiplicity: concatenate all arguments. */
+/** Argument multiplicity: concatenate all arguments. */
 #define CONCAT_ARGS 1,1,1
-/* Argument multiplicity: at least <b>n</b> arguments. */
+/** Argument multiplicity: at least <b>n</b> arguments. */
 #define GE(n)       n,INT_MAX,0
-/* Argument multiplicity: exactly <b>n</b> arguments. */
+/** Argument multiplicity: exactly <b>n</b> arguments. */
 #define EQ(n)       n,n,0
+/**@}*/
 
 /** List of tokens recognized in router descriptors */
 static token_rule_t routerdesc_token_table[] = {

+ 14 - 0
src/or/routerset.c

@@ -9,6 +9,20 @@
  *
  * \brief Functions and structures to handle set-type selection of routers
  *  by name, ID, address, etc.
+ *
+ * This module implements the routerset_t data structure, whose purpose
+ * is to specify a set of relays based on a list of their identities or
+ * properties.  Routersets can restrict relays by IP address mask,
+ * identity fingerprint, country codes, and nicknames (deprecated).
+ *
+ * Routersets are typically used for user-specified restrictions, and
+ * are created by invoking routerset_new and routerset_parse from
+ * config.c and confparse.c.  To use a routerset, invoke one of
+ * routerset_contains_...() functions , or use
+ * routerstatus_get_all_nodes() / routerstatus_subtract_nodes() to
+ * manipulate a smartlist of node_t pointers.
+ *
+ * Country-code restrictions are implemented in geoip.c.
  */
 
 #define ROUTERSET_PRIVATE

+ 17 - 0
src/or/statefile.c

@@ -9,6 +9,23 @@
  *
  * \brief Handles parsing and encoding the persistent 'state' file that carries
  *  miscellaneous persistent state between Tor invocations.
+ *
+ * This 'state' file is a typed key-value store that allows multiple
+ * entries for the same key.  It follows the same metaformat as described
+ * in confparse.c, and uses the same code to read and write itself.
+ *
+ * The state file is most suitable for small values that don't change too
+ * frequently.  For values that become very large, we typically use a separate
+ * file -- for example, see how we handle microdescriptors, by storing them in
+ * a separate file with a journal.
+ *
+ * The current state is accessed via get_or_state(), which returns a singleton
+ * or_state_t object.  Functions that change it should call
+ * or_state_mark_dirty() to ensure that it will get written to disk.
+ *
+ * The or_state_save() function additionally calls various functioens
+ * throughout Tor that might want to flush more state to the the disk,
+ * including some in rephist.c, entrynodes.c, circuitstats.c, hibernate.c.
  */
 
 #define STATEFILE_PRIVATE

+ 7 - 1
src/or/status.c

@@ -3,7 +3,13 @@
 
 /**
  * \file status.c
- * \brief Keep status information and log the heartbeat messages.
+ * \brief Collect status information and log heartbeat messages.
+ *
+ * This module is responsible for implementing the heartbeat log messages,
+ * which periodically inform users and operators about basic facts to
+ * do with their Tor instance.  The log_heartbeat() function, invoked from
+ * main.c, is the principle entry point.  It collects data from elsewhere
+ * in Tor, and logs it in a human-readable format.
  **/
 
 #define STATUS_PRIVATE

+ 4 - 2
src/or/tor_main.c

@@ -17,8 +17,10 @@ const char tor_git_revision[] =
 
 /**
  * \file tor_main.c
- * \brief Stub module containing a main() function. Allows unit
- * test binary to link against main.c.
+ * \brief Stub module containing a main() function.
+ *
+ * We keep the main function in a separate module so that the unit
+ * tests, which have their own main()s, can link against main.c.
  **/
 
 int tor_main(int argc, char *argv[]);