Bläddra i källkod

Make Tor use Niels Provos's libevent instead of it's current
poll-but-sometimes-select mess. This will let us use faster async cores
(like epoll, kpoll, and /dev/poll), and hopefully work better on Windows
too.

There are some fairly nasty changes to main.c here; this will almost
certainly break something. But hey, that's what alphas are for.


svn:r3341

Nick Mathewson 19 år sedan
förälder
incheckning
324b192f68
14 ändrade filer med 355 tillägg och 403 borttagningar
  1. 4 1
      configure.in
  2. 16 2
      doc/TODO
  3. 2 2
      src/common/Makefile.am
  4. 0 1
      src/common/compat.c
  5. 4 0
      src/common/compat.h
  6. 0 108
      src/common/fakepoll.c
  7. 0 73
      src/common/fakepoll.h
  8. 0 3
      src/or/config.c
  9. 25 0
      src/or/connection.c
  10. 3 3
      src/or/connection_edge.c
  11. 2 2
      src/or/connection_or.c
  12. 2 2
      src/or/directory.c
  13. 288 204
      src/or/main.c
  14. 9 2
      src/or/or.h

+ 4 - 1
configure.in

@@ -32,6 +32,7 @@ AC_ARG_WITH(ssl-dir,
 
 AC_SEARCH_LIBS(socket, [socket])
 AC_SEARCH_LIBS(gethostbyname, [nsl])
+AC_SEARCH_LIBS(event_loop, [event], , AC_MSG_ERROR(Libevent library not found. Tor requires libevent to build.))
 
 saved_LIBS="$LIBS"
 saved_LDFLAGS="$LDFLAGS"
@@ -139,7 +140,9 @@ AC_SYS_LARGEFILE
 
 dnl The warning message here is no longer strictly accurate.
 
-AC_CHECK_HEADERS(unistd.h string.h signal.h netdb.h ctype.h poll.h sys/stat.h sys/poll.h sys/types.h fcntl.h sys/fcntl.h sys/ioctl.h sys/socket.h sys/time.h netinet/in.h arpa/inet.h errno.h assert.h time.h pwd.h grp.h zlib.h, , AC_MSG_WARN(some headers were not found, compilation may fail))
+AC_CHECK_HEADERS(unistd.h string.h signal.h netdb.h ctype.h sys/stat.h sys/types.h fcntl.h sys/fcntl.h sys/ioctl.h sys/socket.h sys/time.h netinet/in.h arpa/inet.h errno.h assert.h time.h pwd.h grp.h zlib.h, , AC_MSG_WARN(some headers were not found, compilation may fail))
+
+AC_CHECK_HEADERS(event.h, , AC_MSG_ERROR(Libevent header (event.h) not found. Tor requires libevent to build.))
 
 dnl These headers are not essential
 

+ 16 - 2
doc/TODO

@@ -70,7 +70,7 @@ Tier one:
 
    - Windows
 N    - Make millisecond accuracy work on win32
-     - Switch to WSA*Event code as a better poll replacement.  Or maybe just
+     X Switch to WSA*Event code as a better poll replacement.  Or maybe just
        do libevent?
 
    - Code cleanup
@@ -101,10 +101,24 @@ Tier two:
      - Limit number of circuits that we preemptively generate based on past
        behavior; use same limits in circuit_expire_old_circuits().
      - Write limiting; configurable token buckets.
-     - Switch to libevent?  Evaluate it first.
      - Make it harder to circumvent bandwidth caps: look at number of bytes
        sent across sockets, not number sent inside TLS stream.
 
+     . Switch to libevent
+       o Evaluate libevent
+       o Convert socket handling
+       o Convert signal handling
+       o Convert timers
+       o Update configure.in
+       o Remove fakepoll
+       - Hold-open-until-flushed now works by accident; it should work by
+         design.
+       - The logic for reading from TLS sockets is likely to overrun the
+         bandwidth buckets under heavy load.  (Really, the logic was
+         never right in the first place.)  Also, we should audit all users
+         of get_pending_bytes().
+       - Make sure it works on more platforms.
+       - Find a way to make sure we have libevent 1.0 or later.
 
    - QOI
      - Let more config options (e.g. ORPort) change dynamically.

+ 2 - 2
src/common/Makefile.am

@@ -3,7 +3,7 @@ noinst_LIBRARIES = libor.a libor-crypto.a
 
 #CFLAGS  = -Wall -Wpointer-arith -O2
 
-libor_a_SOURCES = log.c fakepoll.c util.c compat.c container.c
+libor_a_SOURCES = log.c util.c compat.c container.c
 libor_crypto_a_SOURCES = crypto.c aes.c tortls.c torgzip.c
 
-noinst_HEADERS = log.h crypto.h fakepoll.h test.h util.h compat.h aes.h torint.h tortls.h strlcpy.c strlcat.c torgzip.h container.h
+noinst_HEADERS = log.h crypto.h test.h util.h compat.h aes.h torint.h tortls.h strlcpy.c strlcat.c torgzip.h container.h

+ 0 - 1
src/common/compat.c

@@ -8,7 +8,6 @@ const char compat_c_id[] = "$Id$";
 #define _GNU_SOURCE
 
 #include "orconfig.h"
-#include "fakepoll.h"
 #include "compat.h"
 
 #ifdef MS_WINDOWS

+ 4 - 0
src/common/compat.h

@@ -115,6 +115,10 @@ int replace_file(const char *from, const char *to);
 #define tor_close_socket(s) close(s)
 #endif
 
+/* Now that we use libevent, all real sockets are safe for polling ... or
+ * if they aren't, libevent will help us. */
+#define SOCKET_IS_POLLABLE(fd) ((fd)>=0)
+
 struct in_addr;
 int tor_inet_aton(const char *cp, struct in_addr *addr);
 int tor_lookup_hostname(const char *name, uint32_t *addr);

+ 0 - 108
src/common/fakepoll.c

@@ -1,108 +0,0 @@
-/* Copyright 2002,2003 Nick Mathewson, Roger Dingledine */
-/* See LICENSE for licensing information */
-/* $Id$ */
-const char fakepoll_c_id[] = "$Id$";
-
-/**
- * \file fakepoll.c
- *
- * \brief On systems where poll() doesn't exist, fake it with select().
- **/
-
-#include "orconfig.h"
-#include "fakepoll.h"
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_STRING_H
-#include <string.h>
-#endif
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-
-#include <assert.h>
-#include <stdlib.h>
-#include "util.h"
-#include "log.h"
-
-#ifndef USE_FAKE_POLL
-int
-tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout)
-{
-        unsigned int i;
-        for (i=0;i<nfds;++i) {
-                tor_assert(ufds[i].fd >= 0);
-        }
-        return poll(ufds,nfds,timeout);
-}
-#else
-int
-tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout)
-{
-        unsigned int idx;
-        int maxfd, fd;
-        int r;
-#ifdef MS_WINDOWS
-        int any_fds_set = 0;
-#endif
-        fd_set readfds, writefds, exceptfds;
-#ifdef USING_FAKE_TIMEVAL
-#undef timeval
-#undef tv_sec
-#undef tv_usec
-#endif
-        struct timeval _timeout;
-        _timeout.tv_sec = timeout/1000;
-        _timeout.tv_usec = (timeout%1000)*1000;
-        FD_ZERO(&readfds);
-        FD_ZERO(&writefds);
-        FD_ZERO(&exceptfds);
-
-        maxfd = -1;
-        for (idx = 0; idx < nfds; ++idx) {
-                ufds[idx].revents = 0;
-                fd = ufds[idx].fd;
-                tor_assert(SOCKET_IS_POLLABLE(fd));
-                if (fd > maxfd) {
-                  maxfd = fd;
-#ifdef MS_WINDOWS
-                  any_fds_set = 1;
-#endif
-                }
-                if (ufds[idx].events & POLLIN)
-                        FD_SET(fd, &readfds);
-                if (ufds[idx].events & POLLOUT)
-                        FD_SET(fd, &writefds);
-                FD_SET(fd, &exceptfds);
-        }
-#ifdef MS_WINDOWS
-        if (!any_fds_set) {
-                Sleep(timeout);
-                return 0;
-        }
-#endif
-        r = select(maxfd+1, &readfds, &writefds, &exceptfds,
-                   timeout == -1 ? NULL : &_timeout);
-        if (r <= 0)
-                return r;
-        r = 0;
-        for (idx = 0; idx < nfds; ++idx) {
-                fd = ufds[idx].fd;
-                if (FD_ISSET(fd, &readfds))
-                        ufds[idx].revents |= POLLIN;
-                if (FD_ISSET(fd, &writefds))
-                        ufds[idx].revents |= POLLOUT;
-                if (FD_ISSET(fd, &exceptfds))
-                        ufds[idx].revents |= POLLERR;
-                if (ufds[idx].revents)
-                        ++r;
-        }
-        return r;
-}
-#endif
-

+ 0 - 73
src/common/fakepoll.h

@@ -1,73 +0,0 @@
-/* Copyright 2002,2003 Nick Mathewson, Roger Dingledine. */
-/* See LICENSE for licensing information */
-/* $Id$ */
-
-#ifndef __FAKEPOLL_H
-#define __FAKEPOLL_H
-#define FAKEPOLL_H_ID "$Id$"
-
-/**
- * \file fakepoll.h
- * \brief Headers for fakepoll.c
- */
-
-#include "orconfig.h"
-
-#define POLL_NO_WARN
-
-#if defined(HAVE_POLL_H)
-#include <poll.h>
-#elif defined(HAVE_SYS_POLL_H)
-#include <sys/poll.h>
-#endif
-
-/* If _POLL_EMUL_H_ is defined, then poll is just a just a thin wrapper around
- * select.  On Mac OS 10.3, this wrapper is kinda flaky, and we should
- * use our own.
- */
-#if !(defined(HAVE_POLL_H)||defined(HAVE_SYS_POLL_H))&&!defined(_POLL_EMUL_H_)
-#define USE_FAKE_POLL
-#endif
-
-#if defined USE_FAKE_POLL && !defined(_POLL_EMUL_H_)
-struct pollfd {
-  int fd;
-  short events;
-  short revents;
-};
-
-#define POLLIN   0x0001
-#define POLLPRI  0x0002
-#define POLLOUT  0x0004
-#define POLLERR  0x0008
-#define POLLHUP  0x0010
-#define POLLNVAL 0x0020
-#endif
-
-#ifdef MS_WINDOWS
-#define MAXCONNECTIONS 10000 /* XXXX copied from or.h */
-/* This trick makes winsock resize fd_set, which defaults to the insanely low
- * 64. */
-#define FD_SETSIZE MAXCONNECTIONS
-/* XXXX But Windows FD_SET and FD_CLR are tremendously ugly, and linear in
- *   the total number of sockets set! Perhaps we should eventually use
- *   WSAEventSelect and WSAWaitForMultipleEvents instead of select? */
-#endif
-
-#if defined(MS_WINDOWS) || ! defined(USE_FAKE_POLL)
-/* If we're using poll, we can poll as many sockets as we want.
- * If we're on Windows, having too many sockets is harmless, since
- * select stupidly uses an array of sockets rather than a bitfield. */
-#define SOCKET_IS_POLLABLE(fd) ((fd) >= 0)
-#else
-/* If we're using a real Posix select, then in order to be pollable, a socket
- * must
- *   a) be valid (>= 0)
- *   b) be < FD_SETSIZE.
- */
-#define SOCKET_IS_POLLABLE(fd) ((fd) >= 0 && (fd) < FD_SETSIZE)
-#endif
-
-int tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout);
-
-#endif

+ 0 - 3
src/or/config.c

@@ -2435,7 +2435,6 @@ print_cvs_version(void)
   extern const char compat_c_id[];
   extern const char container_c_id[];
   extern const char crypto_c_id[];
-  extern const char fakepoll_c_id[];
   extern const char log_c_id[];
   extern const char torgzip_c_id[];
   extern const char tortls_c_id[];
@@ -2472,7 +2471,6 @@ print_cvs_version(void)
   puts(COMPAT_H_ID);
   puts(CONTAINER_H_ID);
   puts(CRYPTO_H_ID);
-  puts(FAKEPOLL_H_ID);
   puts(LOG_H_ID);
   puts(TORGZIP_H_ID);
   puts(TORINT_H_ID);
@@ -2482,7 +2480,6 @@ print_cvs_version(void)
   puts(compat_c_id);
   puts(container_c_id);
   puts(crypto_c_id);
-  puts(fakepoll_c_id);
   puts(log_c_id);
   puts(torgzip_c_id);
   puts(tortls_c_id);

+ 25 - 0
src/or/connection.c

@@ -167,6 +167,14 @@ void connection_free(connection_t *conn) {
     log_fn(LOG_INFO,"closing fd %d.",conn->s);
     tor_close_socket(conn->s);
   }
+  if (conn->read_event) {
+    event_del(conn->read_event);
+    tor_free(conn->read_event);
+  }
+  if (conn->write_event) {
+    event_del(conn->write_event);
+    tor_free(conn->write_event);
+  }
   memset(conn, 0xAA, sizeof(connection_t)); /* poison memory */
   tor_free(conn);
 }
@@ -300,6 +308,7 @@ _connection_mark_for_close(connection_t *conn)
   }
 
   conn->marked_for_close = 1;
+  add_connection_to_closeable_list(conn);
 
   /* in case we're going to be held-open-til-flushed, reset
    * the number of seconds since last successful write, so
@@ -904,6 +913,7 @@ static int connection_read_to_buf(connection_t *conn, int *max_to_read) {
   }
 
   if (connection_speaks_cells(conn) && conn->state != OR_CONN_STATE_CONNECTING) {
+    int pending;
     if (conn->state == OR_CONN_STATE_HANDSHAKING) {
       /* continue handshaking even if global token bucket is empty */
       return connection_tls_continue_handshake(conn);
@@ -931,7 +941,22 @@ static int connection_read_to_buf(connection_t *conn, int *max_to_read) {
       case TOR_TLS_DONE: /* no data read, so nothing to process */
         result = 0;
         break; /* so we call bucket_decrement below */
+      default:
+        break;
+    }
+    pending = tor_tls_get_pending_bytes(conn->tls);
+    if (pending) {
+      /* XXXX If we have any pending bytes, read them now.  This *can*
+       * take us over our read alotment, but really we shouldn't be
+       * believing that SSL bytes are the same as TCP bytes anyway. */
+      int r2 = read_to_buf_tls(conn->tls, pending, conn->inbuf);
+      if (r2<0) {
+        log_fn(LOG_WARN, "Bug: apparently, reading pending bytes can fail.");
+      } else {
+        result += r2;
+      }
     }
+
   } else {
     result = read_to_buf(conn->s, at_most, conn->inbuf,
                          &conn->inbuf_reached_eof);

+ 3 - 3
src/or/connection_edge.c

@@ -203,7 +203,7 @@ int connection_edge_finished_connecting(connection_t *conn)
          conn->address,conn->port);
 
   conn->state = EXIT_CONN_STATE_OPEN;
-  connection_watch_events(conn, POLLIN); /* stop writing, continue reading */
+  connection_watch_events(conn, EV_READ); /* stop writing, continue reading */
   if (connection_wants_to_flush(conn)) /* in case there are any queued relay cells */
     connection_start_writing(conn);
   /* deliver a 'connected' relay cell back through the circuit. */
@@ -949,7 +949,7 @@ connection_exit_connect(connection_t *conn) {
     case 0:
       conn->state = EXIT_CONN_STATE_CONNECTING;
 
-      connection_watch_events(conn, POLLOUT | POLLIN | POLLERR);
+      connection_watch_events(conn, EV_WRITE | EV_READ);
       /* writable indicates finish, readable indicates broken link,
          error indicates broken link in windowsland. */
       return;
@@ -961,7 +961,7 @@ connection_exit_connect(connection_t *conn) {
     log_fn(LOG_WARN,"Bug: newly connected conn had data waiting!");
 //    connection_start_writing(conn);
   }
-  connection_watch_events(conn, POLLIN);
+  connection_watch_events(conn, EV_READ);
 
   /* also, deliver a 'connected' cell back through the circuit. */
   if (connection_edge_is_rendezvous_stream(conn)) { /* rendezvous stream */

+ 2 - 2
src/or/connection_or.c

@@ -238,7 +238,7 @@ connection_t *connection_or_connect(uint32_t addr, uint16_t port,
       connection_free(conn);
       return NULL;
     case 0:
-      connection_watch_events(conn, POLLIN | POLLOUT | POLLERR);
+      connection_watch_events(conn, EV_READ | EV_WRITE);
       /* writable indicates finish, readable indicates broken link,
          error indicates broken link on windows */
       return conn;
@@ -342,7 +342,7 @@ connection_tls_finish_handshake(connection_t *conn) {
   or_options_t *options = get_options();
 
   conn->state = OR_CONN_STATE_OPEN;
-  connection_watch_events(conn, POLLIN);
+  connection_watch_events(conn, EV_READ);
   log_fn(LOG_DEBUG,"tls handshake done. verifying.");
   if (! tor_tls_peer_has_cert(conn->tls)) { /* It's an old OP. */
     if (server_mode(options)) { /* I'm an OR; good. */

+ 2 - 2
src/or/directory.c

@@ -321,7 +321,7 @@ directory_initiate_command(const char *address, uint32_t addr,
         /* queue the command on the outbuf */
         directory_send_command(conn, platform, purpose, resource,
                                payload, payload_len);
-        connection_watch_events(conn, POLLIN | POLLOUT | POLLERR);
+        connection_watch_events(conn, EV_READ | EV_WRITE);
         /* writable indicates finish, readable indicates broken link,
            error indicates broken link in windowsland. */
     }
@@ -342,7 +342,7 @@ directory_initiate_command(const char *address, uint32_t addr,
     /* queue the command on the outbuf */
     directory_send_command(conn, platform, purpose, resource,
                            payload, payload_len);
-    connection_watch_events(conn, POLLIN | POLLOUT | POLLERR);
+    connection_watch_events(conn, EV_READ | EV_WRITE);
   }
 }
 

+ 288 - 204
src/or/main.c

@@ -12,9 +12,31 @@ const char main_c_id[] = "$Id$";
 
 #include "or.h"
 
+/* These signals are defined to help control_singal_act work. */
+#ifndef SIGHUP
+#define SIGHUP 1
+#endif
+#ifndef SIGINT
+#define SIGINT 2
+#endif
+#ifndef SIGUSR1
+#define SIGUSR1 10
+#endif
+#ifndef SIGUSR2
+#define SIGUSR2 12
+#endif
+#ifndef SIGTERM
+#define SIGTERM 15
+#endif
+
 /********* PROTOTYPES **********/
 
 static void dumpstats(int severity); /* log stats */
+static void conn_read_callback(int fd, short event, void *_conn);
+static void conn_write_callback(int fd, short event, void *_conn);
+static void signal_callback(int fd, short events, void *arg);
+static void second_elapsed_callback(int fd, short event, void *args);
+static int conn_close_if_marked(int i);
 
 /********* START VARIABLES **********/
 
@@ -45,22 +67,10 @@ static time_t time_to_fetch_running_routers = 0;
  * poll_array in the same position.  The first nfds elements are valid. */
 static connection_t *connection_array[MAXCONNECTIONS] =
         { NULL };
-
-/** Array of pollfd objects for calls to poll(). */
-static struct pollfd poll_array[MAXCONNECTIONS];
+static smartlist_t *closeable_connection_lst = NULL;
 
 static int nfds=0; /**< Number of connections currently active. */
 
-#ifndef MS_WINDOWS /* do signal stuff only on unix */
-static int please_dumpstats=0; /**< Whether we should dump stats during the loop. */
-static int please_debug=0; /**< Whether we should switch all logs to -l debug. */
-static int please_reset=0; /**< Whether we just got a sighup. */
-static int please_reap_children=0; /**< Whether we should waitpid for exited children. */
-static int please_sigpipe=0; /**< Whether we've caught a sigpipe lately. */
-static int please_shutdown=0; /**< Whether we should slowly shut down Tor. */
-static int please_die=0; /**< Whether we should immediately shut down Tor. */
-#endif /* signal stuff */
-
 /** We set this to 1 when we've fetched a dir, to know whether to complain
  * yet about unrecognized nicknames in entrynodes, exitnodes, etc.
  * Also, we don't try building circuits unless this is 1. */
@@ -110,11 +120,12 @@ int connection_add(connection_t *conn) {
   conn->poll_index = nfds;
   connection_array[nfds] = conn;
 
-  poll_array[nfds].fd = conn->s;
-
-  /* zero these out here, because otherwise we'll inherit values from the previously freed one */
-  poll_array[nfds].events = 0;
-  poll_array[nfds].revents = 0;
+  conn->read_event = tor_malloc_zero(sizeof(struct event));
+  conn->write_event = tor_malloc_zero(sizeof(struct event));
+  event_set(conn->read_event, conn->s, EV_READ|EV_PERSIST,
+            conn_read_callback, conn);
+  event_set(conn->write_event, conn->s, EV_WRITE|EV_PERSIST,
+            conn_write_callback, conn);
 
   nfds++;
 
@@ -144,17 +155,33 @@ int connection_remove(connection_t *conn) {
     return 0;
   }
 
+  if (conn->read_event) {
+    event_del(conn->read_event);
+    tor_free(conn->read_event);
+  }
+  if (conn->write_event) {
+    event_del(conn->write_event);
+    tor_free(conn->write_event);
+  }
+
   /* replace this one with the one at the end */
   nfds--;
-  poll_array[current_index].fd = poll_array[nfds].fd;
-  poll_array[current_index].events = poll_array[nfds].events;
-  poll_array[current_index].revents = poll_array[nfds].revents;
   connection_array[current_index] = connection_array[nfds];
   connection_array[current_index]->poll_index = current_index;
 
   return 0;
 }
 
+/** DOCDOC **/
+void
+add_connection_to_closeable_list(connection_t *conn)
+{
+  tor_assert(!smartlist_isin(closeable_connection_lst, conn));
+  tor_assert(conn->marked_for_close);
+
+  smartlist_add(closeable_connection_lst, conn);
+}
+
 /** Return true iff conn is in the current poll array. */
 int connection_in_array(connection_t *conn) {
   int i;
@@ -175,67 +202,150 @@ void get_connection_array(connection_t ***array, int *n) {
 }
 
 /** Set the event mask on <b>conn</b> to <b>events</b>.  (The form of
-* the event mask is as for poll().)
+* the event mask is DOCDOC)
  */
 void connection_watch_events(connection_t *conn, short events) {
-
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  tor_assert(conn->poll_index < nfds);
+  tor_assert(conn->read_event);
+  tor_assert(conn->write_event);
+
+  if (events & EV_READ) {
+    event_add(conn->read_event, NULL);
+  } else {
+    event_del(conn->read_event);
+  }
 
-  poll_array[conn->poll_index].events = events;
+  if (events & EV_WRITE) {
+    event_add(conn->write_event, NULL);
+  } else {
+    event_del(conn->write_event);
+  }
 }
 
 /** Return true iff <b>conn</b> is listening for read events. */
 int connection_is_reading(connection_t *conn) {
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  return poll_array[conn->poll_index].events & POLLIN;
+
+  /* This isn't 100% documented, but it should work. */
+  return conn->read_event &&
+    (conn->read_event->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE));
 }
 
 /** Tell the main loop to stop notifying <b>conn</b> of any read events. */
 void connection_stop_reading(connection_t *conn) {
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  tor_assert(conn->poll_index < nfds);
+  tor_assert(conn->read_event);
 
   log(LOG_DEBUG,"connection_stop_reading() called.");
-  poll_array[conn->poll_index].events &= ~POLLIN;
+  event_del(conn->read_event);
 }
 
 /** Tell the main loop to start notifying <b>conn</b> of any read events. */
 void connection_start_reading(connection_t *conn) {
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  tor_assert(conn->poll_index < nfds);
-  poll_array[conn->poll_index].events |= POLLIN;
+  tor_assert(conn->read_event);
+
+  event_add(conn->read_event, NULL);
 }
 
 /** Return true iff <b>conn</b> is listening for write events. */
 int connection_is_writing(connection_t *conn) {
-  return poll_array[conn->poll_index].events & POLLOUT;
+  tor_assert(conn);
+
+  /* This isn't 100% documented, but it should work. */
+  return conn->write_event &&
+    (conn->write_event->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE));
 }
 
 /** Tell the main loop to stop notifying <b>conn</b> of any write events. */
 void connection_stop_writing(connection_t *conn) {
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  tor_assert(conn->poll_index < nfds);
-  poll_array[conn->poll_index].events &= ~POLLOUT;
+  tor_assert(conn->write_event);
+
+  event_del(conn->write_event);
 }
 
 /** Tell the main loop to start notifying <b>conn</b> of any write events. */
 void connection_start_writing(connection_t *conn) {
   tor_assert(conn);
-  tor_assert(conn->poll_index >= 0);
-  tor_assert(conn->poll_index < nfds);
-  poll_array[conn->poll_index].events |= POLLOUT;
+  tor_assert(conn->write_event);
+
+  event_add(conn->write_event, NULL);
 }
 
-/** Called when the connection at connection_array[i] has a read event,
- * or it has pending tls data waiting to be read: checks for validity,
- * catches numerous errors, and dispatches to connection_handle_read.
- */
+/** DOCDOC */
+static void
+close_closeable_connections(void)
+{
+  int i;
+  if (!smartlist_len(closeable_connection_lst))
+    return;
+
+  for (i = 0; i < smartlist_len(closeable_connection_lst); ) {
+    connection_t *conn = smartlist_get(closeable_connection_lst, i);
+    if (!conn_close_if_marked(conn->poll_index))
+      ++i;
+  }
+}
+
+/** DOCDOC */
+static void
+conn_read_callback(int fd, short event, void *_conn)
+{
+  connection_t *conn = _conn;
+  if (conn->marked_for_close)
+    return;
+
+  log_fn(LOG_DEBUG,"socket %d wants to read.",conn->s);
+
+  assert_connection_ok(conn, time(NULL));
+  assert_all_pending_dns_resolves_ok();
+
+  if (connection_handle_read(conn) < 0) {
+    if (!conn->marked_for_close) {
+#ifndef MS_WINDOWS
+      log_fn(LOG_WARN,"Bug: unhandled error on read for %s connection (fd %d); removing",
+             CONN_TYPE_TO_STRING(conn->type), conn->s);
+#endif
+      connection_mark_for_close(conn);
+    }
+  }
+  assert_connection_ok(conn, time(NULL));
+  assert_all_pending_dns_resolves_ok();
+
+  if (smartlist_len(closeable_connection_lst))
+    close_closeable_connections();
+}
+
+static void conn_write_callback(int fd, short events, void *_conn)
+{
+  connection_t *conn = _conn;
+
+  log_fn(LOG_DEBUG,"socket %d wants to write.",conn->s);
+  if (conn->marked_for_close)
+    return;
+
+  assert_connection_ok(conn, time(NULL));
+  assert_all_pending_dns_resolves_ok();
+
+  if (connection_handle_write(conn) < 0) {
+    if (!conn->marked_for_close) {
+      /* this connection is broken. remove it. */
+      log_fn(LOG_WARN,"Bug: unhandled error on write for %s connection (fd %d); removing",
+             CONN_TYPE_TO_STRING(conn->type), conn->s);
+      conn->has_sent_end = 1; /* otherwise we cry wolf about duplicate close */
+      /* XXX do we need a close-immediate here, so we don't try to flush? */
+      connection_mark_for_close(conn);
+    }
+  }
+  assert_connection_ok(conn, time(NULL));
+  assert_all_pending_dns_resolves_ok();
+
+  if (smartlist_len(closeable_connection_lst))
+    close_closeable_connections();
+}
+
+#if 0
 static void conn_read(int i) {
   connection_t *conn = connection_array[i];
 
@@ -336,6 +446,7 @@ static void conn_write(int i) {
   assert_connection_ok(conn, time(NULL));
   assert_all_pending_dns_resolves_ok();
 }
+#endif
 
 /** If the connection at connection_array[i] is marked for close, then:
  *    - If it has data that it wants to flush, try to flush it.
@@ -343,16 +454,16 @@ static void conn_write(int i) {
  *      true, then leave the connection open and return.
  *    - Otherwise, remove the connection from connection_array and from
  *      all other lists, close it, and free it.
- * If we remove the connection, then call conn_closed_if_marked at the new
- * connection at position i.
+ * Returns 1 if the connection was closed, 0 otherwise.
+ * DOCDOC closeable_list
  */
-static void conn_close_if_marked(int i) {
+static int conn_close_if_marked(int i) {
   connection_t *conn;
   int retval;
 
   conn = connection_array[i];
   if (!conn->marked_for_close)
-    return; /* nothing to see here, move along */
+    return 0; /* nothing to see here, move along */
   assert_connection_ok(conn, time(NULL));
   assert_all_pending_dns_resolves_ok();
 
@@ -378,7 +489,7 @@ static void conn_close_if_marked(int i) {
        conn->hold_open_until_flushed && connection_wants_to_flush(conn)) {
       log_fn(LOG_INFO,"Holding conn (fd %d) open for more flushing.",conn->s);
       /* XXX should we reset timestamp_lastwritten here? */
-      return;
+      return 0;
     }
     if (connection_wants_to_flush(conn)) {
       log_fn(LOG_NOTICE,"Conn (addr %s, fd %d, type %s, state %d) is being closed, but there are still %d bytes we can't write. (Marked at %s:%d)",
@@ -394,14 +505,12 @@ static void conn_close_if_marked(int i) {
   circuit_about_to_close_connection(conn);
   connection_about_to_close_connection(conn);
   connection_remove(conn);
+  smartlist_remove(closeable_connection_lst, conn);
   if (conn->type == CONN_TYPE_EXIT) {
     assert_connection_edge_not_dns_pending(conn);
   }
   connection_free(conn);
-  if (i<nfds) { /* we just replaced the one at i with a new one.
-                  process it too. */
-    conn_close_if_marked(i);
-  }
+  return 1;
 }
 
 /** We've just tried every dirserver we know about, and none of
@@ -731,61 +840,65 @@ static void run_scheduled_events(time_t now) {
    * because if we marked a conn for close and left its socket -1, then
    * we'll pass it to poll/select and bad things will happen.
    */
-  for (i=0;i<nfds;i++)
-    conn_close_if_marked(i);
+  close_closeable_connections();
 }
 
-/** Called every time we're about to call tor_poll.  Increments statistics,
- * and adjusts token buckets.  Returns the number of milliseconds to use for
- * the poll() timeout.
- */
-static int prepare_for_poll(void) {
-  static long current_second = 0; /* from previous calls to gettimeofday */
-  connection_t *conn;
+/** DOCDOC */
+static void second_elapsed_callback(int fd, short event, void *args)
+{
+  static struct event *timeout_event = NULL;
+  static struct timeval one_second;
+  static long current_second = 0;
   struct timeval now;
-  int i;
+  size_t bytes_written;
+  size_t bytes_read;
+  int seconds_elapsed;
+  if (!timeout_event) {
+    timeout_event = tor_malloc_zero(sizeof(struct event));
+    evtimer_set(timeout_event, second_elapsed_callback, NULL);
+    one_second.tv_sec = 1;
+    one_second.tv_usec = 0;
+  }
 
+  /* log_fn(LOG_NOTICE, "Tick."); */
   tor_gettimeofday(&now);
 
-  if (now.tv_sec > current_second) {
-    /* the second has rolled over. check more stuff. */
-    size_t bytes_written;
-    size_t bytes_read;
-    int seconds_elapsed;
-    bytes_written = stats_prev_global_write_bucket - global_write_bucket;
-    bytes_read = stats_prev_global_read_bucket - global_read_bucket;
-    seconds_elapsed = current_second ? (now.tv_sec - current_second) : 0;
-    stats_n_bytes_read += bytes_read;
-    stats_n_bytes_written += bytes_written;
-    if (accounting_is_enabled(get_options()))
-      accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed);
-    control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written);
-
-    connection_bucket_refill(&now);
-    stats_prev_global_read_bucket = global_read_bucket;
-    stats_prev_global_write_bucket = global_write_bucket;
-
-    /* if more than 10s have elapsed, probably the clock changed: doesn't count. */
-    if (seconds_elapsed < 10)
-      stats_n_seconds_working += seconds_elapsed;
-
-    assert_all_pending_dns_resolves_ok();
-    run_scheduled_events(now.tv_sec);
-    assert_all_pending_dns_resolves_ok();
-
-    current_second = now.tv_sec; /* remember which second it is, for next time */
-  }
+  /* the second has rolled over. check more stuff. */
+  bytes_written = stats_prev_global_write_bucket - global_write_bucket;
+  bytes_read = stats_prev_global_read_bucket - global_read_bucket;
+  seconds_elapsed = current_second ? (now.tv_sec - current_second) : 0;
+  stats_n_bytes_read += bytes_read;
+  stats_n_bytes_written += bytes_written;
+  if (accounting_is_enabled(get_options()))
+    accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed);
+  control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written);
+
+  connection_bucket_refill(&now);
+  stats_prev_global_read_bucket = global_read_bucket;
+  stats_prev_global_write_bucket = global_write_bucket;
+
+  /* if more than 10s have elapsed, probably the clock changed: doesn't count. */
+  if (seconds_elapsed < 10)
+    stats_n_seconds_working += seconds_elapsed;
+
+  assert_all_pending_dns_resolves_ok();
+  run_scheduled_events(now.tv_sec);
+  assert_all_pending_dns_resolves_ok();
+
+  current_second = now.tv_sec; /* remember which second it is, for next time */
 
+#if 0
   for (i=0;i<nfds;i++) {
     conn = connection_array[i];
     if (connection_has_pending_tls_data(conn) &&
         connection_is_reading(conn)) {
       log_fn(LOG_DEBUG,"sock %d has pending bytes.",conn->s);
-      return 0; /* has pending bytes to read; don't let poll wait. */
+      return; /* has pending bytes to read; don't let poll wait. */
     }
   }
+#endif
 
-  return (1000 - (now.tv_usec / 1000)); /* how many milliseconds til the next second? */
+  evtimer_add(timeout_event, &one_second);
 }
 
 /** Called when we get a SIGHUP: reload configuration files and keys,
@@ -834,9 +947,7 @@ static int do_hup(void) {
 
 /** Tor main loop. */
 static int do_main_loop(void) {
-  int i;
-  int timeout;
-  int poll_result;
+  int loop_result;
 
   /* load the private keys, if we're supposed to have them, and set up the
    * TLS context. */
@@ -867,6 +978,9 @@ static int do_main_loop(void) {
     cpu_init();
   }
 
+  /* set up once-a-second callback. */
+  second_elapsed_callback(0,0,NULL);
+
   for (;;) {
 #ifdef MS_WINDOWS_SERVICE /* Do service stuff only on windows. */
     if (service_status.dwCurrentState == SERVICE_STOP_PENDING) {
@@ -876,57 +990,12 @@ static int do_main_loop(void) {
       return 0;
     }
 #endif
-#ifndef MS_WINDOWS /* do signal stuff only on unix */
-    if (please_die) {
-      log(LOG_ERR,"Catching signal TERM, exiting cleanly.");
-      tor_cleanup();
-      exit(0);
-    }
-    if (please_shutdown) {
-      if (!server_mode(get_options())) { /* do it now */
-        log(LOG_NOTICE,"Interrupt: exiting cleanly.");
-        tor_cleanup();
-        exit(0);
-      }
-      hibernate_begin_shutdown();
-      please_shutdown = 0;
-    }
-    if (please_sigpipe) {
-      log(LOG_NOTICE,"Caught sigpipe. Ignoring.");
-      please_sigpipe = 0;
-    }
-    if (please_dumpstats) {
-      /* prefer to log it at INFO, but make sure we always see it */
-      dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO);
-      please_dumpstats = 0;
-    }
-    if (please_debug) {
-      switch_logs_debug();
-      log(LOG_NOTICE,"Caught USR2. Going to loglevel debug.");
-      please_debug = 0;
-    }
-    if (please_reset) {
-      if (do_hup() < 0) {
-        log_fn(LOG_WARN,"Restart failed (config error?). Exiting.");
-        tor_cleanup();
-        exit(1);
-      }
-      please_reset = 0;
-    }
-    if (please_reap_children) {
-      while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more zombies */
-      please_reap_children = 0;
-    }
-#endif /* signal stuff */
-
-    timeout = prepare_for_poll();
-
     /* poll until we have an event, or the second ends */
-    poll_result = tor_poll(poll_array, nfds, timeout);
+    loop_result = event_dispatch();
 
     /* let catch() handle things like ^c, and otherwise don't worry about it */
-    if (poll_result < 0) {
-      int e = tor_socket_errno(-1);
+    if (loop_result < 0) {
+      int e = errno;
       /* let the program survive things like ^z */
       if (e != EINTR) {
         log_fn(LOG_ERR,"poll failed: %s [%d]",
@@ -940,20 +1009,9 @@ static int do_main_loop(void) {
       }
     }
 
-    /* do all the reads and errors first, so we can detect closed sockets */
-    for (i=0;i<nfds;i++)
-      conn_read(i); /* this also marks broken connections */
-
-    /* then do the writes */
-    for (i=0;i<nfds;i++)
-      conn_write(i);
-
-    /* any of the conns need to be closed now? */
-    for (i=0;i<nfds;i++)
-      conn_close_if_marked(i);
-
     /* refilling buckets and sending cells happens at the beginning of the
      * next iteration of the loop, inside prepare_for_poll()
+     * XXXX No longer so.
      */
   }
 }
@@ -973,19 +1031,19 @@ control_signal_act(int the_signal)
   switch(the_signal)
     {
     case 1:
-      please_reset = 1;
+      signal_callback(0,0,(void*)SIGHUP);
       break;
     case 2:
-      please_shutdown = 1;
+      signal_callback(0,0,(void*)SIGINT);
       break;
     case 10:
-      please_dumpstats = 1;
+      signal_callback(0,0,(void*)SIGUSR1);
       break;
     case 12:
-      please_debug = 1;
+      signal_callback(0,0,(void*)SIGUSR2);
       break;
     case 15:
-      please_die = 1;
+      signal_callback(0,0,(void*)SIGTERM);
       break;
     default:
       return -1;
@@ -993,45 +1051,50 @@ control_signal_act(int the_signal)
   return 0;
 }
 
-/** Unix signal handler. */
-static void catch(int the_signal) {
-
-#ifndef MS_WINDOWS /* do signal stuff only on unix */
-  switch (the_signal) {
-//    case SIGABRT:
+static void signal_callback(int fd, short events, void *arg)
+{
+  int sig = (int) arg;
+  switch (sig)
+    {
     case SIGTERM:
-      please_die = 1;
+      log(LOG_ERR,"Catching signal TERM, exiting cleanly.");
+      tor_cleanup();
+      exit(0);
       break;
     case SIGINT:
-      please_shutdown = 1;
+      if (!server_mode(get_options())) { /* do it now */
+        log(LOG_NOTICE,"Interrupt: exiting cleanly.");
+        tor_cleanup();
+        exit(0);
+      }
+      hibernate_begin_shutdown();
       break;
+#ifdef SIGPIPE
     case SIGPIPE:
-      /* don't log here, since it's possible you got the sigpipe because
-       * your log failed! */
-      please_sigpipe = 1;
-      break;
-    case SIGHUP:
-      please_reset = 1;
+      log(LOG_NOTICE,"Caught sigpipe. Ignoring.");
       break;
+#endif
     case SIGUSR1:
-      please_dumpstats = 1;
+      /* prefer to log it at INFO, but make sure we always see it */
+      dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO);
       break;
     case SIGUSR2:
-      please_debug = 1;
+      switch_logs_debug();
+      log(LOG_NOTICE,"Caught USR2. Going to loglevel debug.");
+      break;
+    case SIGHUP:
+      if (do_hup() < 0) {
+        log_fn(LOG_WARN,"Restart failed (config error?). Exiting.");
+        tor_cleanup();
+        exit(1);
+      }
       break;
+#ifdef SIGCHLD
     case SIGCHLD:
-      please_reap_children = 1;
+      while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more zombies */
       break;
-#ifdef SIGXFSZ
-    case SIGXFSZ: /* this happens when write fails with etoobig */
-      break; /* ignore; write will fail and we'll look at errno. */
+    }
 #endif
-    default:
-      log(LOG_WARN,"Caught signal %d that we can't handle??", the_signal);
-      tor_cleanup();
-      exit(1);
-  }
-#endif /* signal stuff */
 }
 
 /** Write all statistics to the log, with log level 'severity'.  Called
@@ -1120,30 +1183,49 @@ static void exit_function(void)
 void handle_signals(int is_parent)
 {
 #ifndef MS_WINDOWS /* do signal stuff only on unix */
-  struct sigaction action;
-  action.sa_flags = 0;
-  sigemptyset(&action.sa_mask);
-
-  action.sa_handler = is_parent ? catch : SIG_IGN;
-  sigaction(SIGINT,  &action, NULL); /* do a controlled slow shutdown */
-  sigaction(SIGTERM, &action, NULL); /* to terminate now */
-  sigaction(SIGPIPE, &action, NULL); /* otherwise sigpipe kills us */
-  sigaction(SIGUSR1, &action, NULL); /* dump stats */
-  sigaction(SIGUSR2, &action, NULL); /* go to loglevel debug */
-  sigaction(SIGHUP,  &action, NULL); /* to reload config, retry conns, etc */
+  int i;
+  static int signals[] = {
+    SIGINT,
+    SIGTERM,
+    SIGPIPE,
+    SIGUSR1,
+    SIGUSR2,
+    SIGHUP,
 #ifdef SIGXFSZ
-  sigaction(SIGXFSZ, &action, NULL); /* handle file-too-big resource exhaustion */
+    SIGXFSZ,
+#endif
+    SIGCHLD,
+    -1 };
+  static struct event signal_events[16]; /* bigger than it has to be. */
+  if (is_parent) {
+    for (i = 0; signals[i] >= 0; ++i) {
+      signal_set(&signal_events[i], signals[i], signal_callback,
+                 (void*)signals[i]);
+      signal_add(&signal_events[i], NULL);
+    }
+  } else {
+    struct sigaction action;
+    action.sa_flags = 0;
+    sigemptyset(&action.sa_mask);
+    action.sa_handler = SIG_IGN;
+    sigaction(SIGINT,  &action, NULL); /* do a controlled slow shutdown */
+    sigaction(SIGTERM, &action, NULL); /* to terminate now */
+    sigaction(SIGPIPE, &action, NULL); /* otherwise sigpipe kills us */
+    sigaction(SIGUSR1, &action, NULL); /* dump stats */
+    sigaction(SIGUSR2, &action, NULL); /* go to loglevel debug */
+    sigaction(SIGHUP,  &action, NULL); /* to reload config, retry conns, etc */
+#ifdef SIGXFSZ
+    sigaction(SIGXFSZ, &action, NULL); /* handle file-too-big resource exhaustion */
 #endif
-  if (is_parent)
-    sigaction(SIGCHLD, &action, NULL); /* handle dns/cpu workers that exit */
 #endif /* signal stuff */
+  }
 }
 
 /** Main entry point for the Tor command-line client.
  */
 static int tor_init(int argc, char *argv[]) {
-
   time_of_process_start = time(NULL);
+  closeable_connection_lst = smartlist_create();
   /* Initialize the history structures. */
   rep_hist_init();
   /* Initialize the service cache. */
@@ -1159,6 +1241,8 @@ static int tor_init(int argc, char *argv[]) {
     return -1;
   }
   atexit(exit_function);
+  event_init(); /* This needs to happen before net stuff. Is it okay if this
+                 * happens before daemonizing? */
 
   if (init_from_config(argc,argv) < 0) {
     log_fn(LOG_ERR,"Reading config failed--see warnings above. For usage, try -h.");
@@ -1195,7 +1279,7 @@ void tor_cleanup(void) {
     accounting_record_bandwidth_usage(time(NULL));
 }
 
-/** Read/create keys as needed, and echo our fingerprint to stdout. */
+/** Read/craete keys as needed, and echo our fingerprint to stdout. */
 static void do_list_fingerprint(void)
 {
   char buf[FINGERPRINT_LEN+1];

+ 9 - 2
src/or/or.h

@@ -40,7 +40,6 @@
 #include <ctype.h>
 #endif
 #include "../common/torint.h"
-#include "../common/fakepoll.h"
 #ifdef HAVE_INTTYPES_H
 #include <inttypes.h>
 #endif
@@ -97,6 +96,11 @@
 #ifdef HAVE_TIME_H
 #include <time.h>
 #endif
+#ifdef HAVE_EVENT_H
+#include <event.h>
+#else
+#error "Tor requires libevent to build."
+#endif
 
 #ifdef MS_WINDOWS
 #if (_MSC_VER <= 1300)
@@ -494,7 +498,9 @@ struct connection_t {
                            * the bandwidth throttler allows reads?
                            */
   int s; /**< Our socket; -1 if this connection is closed. */
-  int poll_index; /**< Index of this conn into the poll_array. */
+  int poll_index; /* XXXX rename. */
+  struct event *read_event; /**< libevent event structure. */
+  struct event *write_event; /**< libevent event structure. */
   int marked_for_close; /**< Boolean: should we close this conn on the next
                          * iteration of the main loop?
                          */
@@ -1380,6 +1386,7 @@ void consider_hibernation(time_t now);
 int connection_add(connection_t *conn);
 int connection_remove(connection_t *conn);
 int connection_in_array(connection_t *conn);
+void add_connection_to_closeable_list(connection_t *conn);
 
 void get_connection_array(connection_t ***array, int *n);