12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325 |
- \documentclass[times,10pt,twocolumn]{article}
- \usepackage{latex8}
- %\usepackage{times}
- \usepackage{url}
- \usepackage{graphics}
- \usepackage{amsmath}
- \pagestyle{empty}
- \renewcommand\url{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- \newcommand\emailaddr{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- % If an URL ends up with '%'s in it, that's because the line *in the .bib/.tex
- % file* is too long, so break it there (it doesn't matter if the next line is
- % indented with spaces). -DH
- %\newif\ifpdf
- %\ifx\pdfoutput\undefined
- % \pdffalse
- %\else
- % \pdfoutput=1
- % \pdftrue
- %\fi
- \newenvironment{tightlist}{\begin{list}{$\bullet$}{
- \setlength{\itemsep}{0mm}
- \setlength{\parsep}{0mm}
- % \setlength{\labelsep}{0mm}
- % \setlength{\labelwidth}{0mm}
- % \setlength{\topsep}{0mm}
- }}{\end{list}}
- \begin{document}
- %% Use dvipdfm instead. --DH
- %\ifpdf
- % \pdfcompresslevel=9
- % \pdfpagewidth=\the\paperwidth
- % \pdfpageheight=\the\paperheight
- %\fi
- \title{Tor: Design of a Second-Generation Onion Router}
- %\author{Roger Dingledine \\ The Free Haven Project \\ arma@freehaven.net \and
- %Nick Mathewson \\ The Free Haven Project \\ nickm@freehaven.net \and
- %Paul Syverson \\ Naval Research Lab \\ syverson@itd.nrl.navy.mil}
- \maketitle
- \thispagestyle{empty}
- \begin{abstract}
- We present Tor, a connection-based low-latency anonymous communication
- system. Tor is the successor to Onion Routing
- and addresses many limitations in the original Onion Routing design.
- Tor works in a real-world Internet environment,
- % it's user-space too
- requires little synchronization or coordination between nodes, and
- protects against known anonymity-breaking attacks as well
- as or better than other systems with similar design parameters.
- % and we present a big list of open problems at the end
- \end{abstract}
- %\begin{center}
- %\textbf{Keywords:} anonymity, peer-to-peer, remailer, nymserver, reply block
- %\end{center}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Overview}
- \label{sec:intro}
- Onion Routing is a distributed overlay network designed to anonymize
- low-latency TCP-based applications such as web browsing, secure shell,
- and instant messaging. Clients choose a path through the network and
- build a \emph{virtual circuit}, in which each node in the path knows its
- predecessor and successor, but no others. Traffic flowing down the circuit
- is sent in fixed-size \emph{cells}, which are unwrapped by a symmetric key
- at each node (like the layers of an onion) and relayed downstream. The
- original Onion Routing project published several design and analysis
- papers
- \cite{or-jsac98,or-discex00,or-ih96,or-pet00}. While there was briefly
- a wide area Onion Routing network,
- % how long is briefly? a day, a month? -RD
- the only long-running and publicly accessible
- implementation was a fragile proof-of-concept that ran on a single
- machine. Many critical design and deployment issues were never implemented,
- and the design has not been updated in several years.
- Here we describe Tor, a protocol for asynchronous, loosely
- federated onion routers that provides the following improvements over
- the old Onion Routing design, and over other low-latency anonymity systems:
- \begin{tightlist}
- \item \textbf{Perfect forward secrecy:} The original Onion Routing
- design is vulnerable to a single hostile node recording traffic and later
- compromising successive nodes in the circuit and forcing them to
- decrypt it.
- Rather than using
- onions to lay the circuits, Tor uses an incremental or \emph{telescoping}
- path-building design, where the initiator negotiates session keys with
- each successive hop in the circuit. Onion replay detection is no longer
- necessary, and the process of building circuits is more reliable, since
- the initiator knows when a hop fails and can then try extending to a new node.
- % Perhaps mention that not all of these are things that we invented. -NM
- \item \textbf{Separation of protocol cleaning from anonymity:}
- The original Onion Routing design required a separate ``application
- proxy'' for each
- supported application protocol, resulting in a lot of extra code --- most
- of which was never written, so most applications were not supported.
- Tor uses the unified and standard Socks
- \cite{socks4,socks5} proxy interface, allowing us to support most TCP-based
- programs without modification. This design change allows Tor to
- use the protocol-normalization features of privacy-enhancing
- application-level proxies such as Privoxy without having to
- incorporate those features itself.
- \item \textbf{Many TCP streams can share one circuit:} The original
- Onion Routing design built one circuit for each application-level
- request.
- Aside from the performance issues of doing multiple public key
- operations for every request, building a circuit for each request can
- endanger anonymity.
- The very first Onion Routing design \cite{or-ih96} protected against
- this to some extent by hiding network access behind an onion
- router/firewall that was also forwarding traffic from other nodes.
- However, even if this meant complete protection, many users can
- benefit from Onion Routing for which neither running one's own node
- nor such firewall configurations are adequately convenient to be
- feasible. Those users, especially if they engage in certain unusual
- communication behaviors, may be identifiable \cite{wright03}. To
- complicate the possibility of such attacks Tor multiplexes many
- stream down each circuit, but still rotates the circuit
- periodically to avoid too much linkability from requests on a single
- circuit.
- \item \textbf{No mixing, padding, or traffic shaping:}
- The original Onion Routing
- design called for full link padding both between onion routers and between
- onion proxies (that is, users) and onion routers \cite{or-jsac98}. The
- later analysis paper \cite{or-pet00} suggested \emph{traffic shaping}
- to provide similar protection but use less bandwidth, but did not go
- into detail. However, recent research \cite{econymics} and deployment
- experience \cite{freedom21-security} suggest that this level of resource
- use is not practical or economical; and even full link padding is still
- vulnerable to active attacks \cite{defensive-dropping}.
- %[An upcoming FC04 paper. I'll add a cite when it's out. -RD]
- %Say that: although some less resource-heavy traffic shaping approaches may
- %help resist certain attacks, we aren't getting on the bandwagon yet? -NM.
- \item \textbf{Leaky-pipe circuit topology:} Through in-band
- signalling within the
- circuit, Tor initiators can direct traffic to nodes partway down the
- circuit. This allows for long-range padding to frustrate traffic
- shape and volume attacks at the initiator \cite{defensive-dropping},
- but because circuits are used by more than one application, it also
- allows traffic to exit the circuit from the middle -- thus
- frustrating traffic shape and volume attacks based on observing exit
- points.
- %Or something like that. hm. Tone this down maybe? Or support it. -RD
- %How's that? -PS
- \item \textbf{Congestion control:} Earlier anonymity designs do not
- address traffic bottlenecks. Unfortunately, typical approaches to load
- balancing and flow control in overlay networks involve inter-node control
- communication and global views of traffic. Our decentralized ack-based
- congestion control maintains reasonable anonymity while allowing nodes
- at the edges of the network to detect congestion or flooding attacks
- and send less data until the congestion subsides.
- \item \textbf{Directory servers:} Rather than attempting to flood
- link-state information through the network, which can be unreliable and
- open to partitioning attacks or outright deception, Tor takes a simplified
- view towards distributing link-state information. Certain more trusted
- onion routers also serve as directory servers; they provide signed
- \emph{directories} describing all routers they know about, and which
- are currently up. Users periodically download these directories via HTTP.
- \item \textbf{End-to-end integrity checking:} Without integrity checking
- on traffic going through the network, any onion router on the path
- can change the contents of cells as they pass by, e.g. to redirect a
- connection on the fly so it connects to a different webserver, or to
- tag encrypted traffic and look for the tagged traffic at the network
- edges \cite{minion-design}.
- \item \textbf{Robustness to failed nodes:} A failed node in a traditional
- mix network means lost messages, but thanks to Tor's step-by-step
- circuit building, users can notice failed
- nodes while building circuits and route around them. Additionally,
- liveness information from directories allows users to avoid
- unreliable node in the first place.
- %We further provide a
- %simple mechanism that allows connections to be established despite recent
- %node failure or slightly dated information from a directory server. Tor
- %permits onion routers to have \emph{router twins} --- nodes that share
- %the same private decryption key. Note that because connections now have
- %perfect forward secrecy, an onion router still cannot read the traffic
- %on a connection established through its twin even while that connection
- %is active. Also, which nodes are twins can change dynamically depending
- %on current circumstances, and twins may or may not be under the same
- %administrative authority.
- %
- %[Commented out; Router twins provide no real increase in robustness
- %to failed nodes. If a non-twinned node goes down, the
- %circuit-builder notices this and routes around it. Circuit-building
- %is offline, so there shouldn't even be a latency hit. -NM]
- \item \textbf{Variable exit policies:} Tor provides a consistent
- mechanism for
- each node to specify and advertise a policy describing the hosts and
- ports to which it will connect. These exit policies
- are critical in a volunteer-based distributed infrastructure, because
- each operator is comfortable with allowing different types of traffic
- to exit the Tor network from his node.
- \item \textbf{Implementable in user-space}.
- \item \textbf{Rendezvous points and location-protected servers:} Tor
- provides an integrated mechanism for responder-anonymity
- location-protected servers. [XXX say more.]
- [XXX Mention that reply onions are out because they're brittle don't give PFS.]
- \end{tightlist}
- [XXX carefully mention implementation, emphasizing that experience
- deploying isn't there yet, and not all features are implemented.
- Mention that it runs, is kinda alpha, kinda deployed, runs on win32.]
- We review previous work in Section \ref{sec:background}, describe
- our goals and assumptions in Section \ref{sec:assumptions},
- and then address the above list of improvements in Sections
- \ref{sec:design}-\ref{sec:maintaining-anonymity}. We then summarize
- how our design stands up to known attacks, and conclude with a list of
- open problems.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Background and threat model}
- \label{sec:background}
- \SubSection{Related work}
- \label{sec:related-work}
- Modern anonymity designs date to Chaum's Mix-Net\cite{chaum-mix} design of
- 1981. Chaum proposed hiding sender-recipient connections by wrapping
- messages in several layers of public key cryptography, and relaying them
- through a path composed of Mix servers. Mix servers in turn decrypt, delay,
- and re-order messages, before relay them along the path towards their
- destinations.
- Subsequent relay-based anonymity designs have diverged in two
- principal directions. Some have attempted to maximize anonymity at
- the cost of introducing comparatively large and variable latencies,
- for example, Babel\cite{babel}, Mixmaster\cite{mixmaster-spec}, and
- Mixminion\cite{minion-design}. Because of this
- trade-off, such \emph{high-latency} networks are well-suited for anonymous
- email, but introduce too much lag for interactive tasks such as web browsing,
- internet chat, or SSH connections.
- % Parts of this graf belongs later in expository order. Some of the
- % sentences seem superficially unrelated.
- Tor belongs to the second category: \emph{low-latency} designs that
- attempt to anonymize interactive network traffic. Because such
- traffic tends to involve a relatively large numbers of packets, it is
- difficult to prevent an attacker who can eavesdrop entry and exit
- points from correlating packets entering the anonymity network with
- packets leaving it. Although some work has been done to frustrate
- these attacks, most designs protect primarily against traffic analysis
- rather than traffic confirmation \cite{or-jsac98}. One can pad and
- limit communication to a constant rate or at least to control the
- variation in traffic shape. This can have prohibitive bandwidth costs
- and/or performance limitations. One can also use a cascade (fixed
- shared route) with a relatively fixed set of users. This assumes a
- significant degree of agreement and provides an easier target for an active
- attacker since the endpoints are generally known. However, a practical
- network with both of these features and thousands of active users has
- been run for many years (the Java Anon Proxy, aka Web MIXes,
- \cite{web-mix}).
- Another low latency design that was proposed independently and at
- about the same time as the original Onion Routing was PipeNet \cite{pipenet}.
- It provided anonymity protections that were stronger than Onion Routing's,
- but at the cost of allowing a single user to shut down the network simply
- by not sending. It was also never implemented or formally published.
- The simplest low-latency designs are single-hop proxies such as the
- Anonymizer \cite{anonymizer}, wherein a single trusted server removes
- identifying users' data before relaying it. These designs are easy to
- analyze, but require end-users to trust the anonymizing proxy.
- More complex are distributed-trust, channel-based anonymizing systems. In
- these designs, a user establishes one or more medium-term bidirectional
- end-to-end tunnels to exit servers, and uses those tunnels to deliver a
- number of low-latency packets to and from one or more destinations per
- tunnel. Establishing tunnels is comparatively expensive and typically
- requires public-key cryptography, whereas relaying packets along a tunnel is
- comparatively inexpensive. Because a tunnel crosses several servers, no
- single server can learn the user's communication partners.
- %[Ouch: We haven't said what an onion is yet, but we use the word here! -NM]
- Systems such as earlier versions of Freedom and the original Onion Routing
- build the anonymous channel all at once (using an onion).
- Later designs of Freedom and Tor as described herein build
- the channel in stages, as does AnonNet
- \cite{anonnet}. Amongst other things, this makes perfect forward
- secrecy feasible.
- Some systems, such as Crowds \cite{crowds-tissec}, do not rely on the
- changing appearance of packets to hide the path; rather they employ
- mechanisms so that an intermediary cannot be sure when it is
- receiving from/sending to the ultimate initiator. There is no public-key
- encryption needed for Crowds, but the responder and all data are
- visible to all nodes on the path so that anonymity of connection
- initiator depends on filtering all identifying information from the
- data stream. Crowds is also designed only for HTTP traffic.
- Hordes \cite{hordes-jcs} is based on Crowds but also uses multicast
- responses to hide the initiator. Herbivore \cite{herbivore} and
- P5 \cite{p5} go even further requiring broadcast.
- They each use broadcast in very different ways, and tradeoffs are made to
- make broadcast more practical. Both Herbivore and P5 are designed primarily
- for communication between communicating peers, although Herbivore
- permits external connections by requesting a peer to serve as a proxy.
- Allowing easy connections to nonparticipating responders or recipients
- is a practical requirement for many users, e.g., to visit
- nonparticipating Web sites or to exchange mail with nonparticipating
- recipients.
- Distributed-trust anonymizing systems differ in how they prevent attackers
- from controlling too many servers and thus compromising too many user paths.
- Some protocols rely on a centrally maintained set of well-known anonymizing
- servers. The current Tor design falls into this category.
- Others (such as Tarzan and MorphMix) allow unknown users to run
- servers, while using a limited resource (DHT space for Tarzan; IP space for
- MorphMix) to prevent an attacker from owning too much of the network.
- Crowds uses a centralized ``blender'' to enforce Crowd membership
- policy. For small crowds it is suggested that familiarity with all
- members is adequate. For large diverse crowds, limiting accounts in
- control of any one party is more difficult:
- ``(e.g., the blender administrator sets up an account for a user only
- after receiving a written, notarized request from that user) and each
- account to one jondo, and by monitoring and limiting the number of
- jondos on any one net- work (using IP address), the attacker would be
- forced to launch jondos using many different identities and on many
- different networks to succeed'' \cite{crowds-tissec}.
- Tor is not primarily designed for censorship resistance but rather
- for anonymous communication. However, Tor's rendezvous points, which
- enable connections between mutually anonymous entities, also
- facilitate connections to hidden servers. These building blocks to
- censorship resistance and other capabilities are described in
- Section~\ref{sec:rendezvous}. Location-hidden servers are an
- essential component for anonymous publishing systems such as
- Publius\cite{publius}, Free Haven\cite{freehaven-berk}, and
- Tangler\cite{tangler}.
- [XXX I'm considering the subsection as ended here for now. I'm leaving the
- following notes in case we want to revisit any of them. -PS]
- Channel-based anonymizing systems also differ in their use of dummy traffic.
- [XXX]
- Finally, several systems provide low-latency anonymity without channel-based
- communication. Crowds and [XXX] provide anonymity for HTTP requests; [...]
- [XXX Mention error recovery?]
- STILL NOT MENTIONED:
- isdn-mixes\\
- real-time mixes\\
- rewebbers\\
- cebolla\\
- [XXX Close by mentioning where Tor fits.]
- \Section{Design goals and assumptions}
- \label{sec:assumptions}
- \subsection{Goals}
- % Reformat this section like ``Adversary Model'' is formatted. -NM
- Like other low-latency anonymity designs, Tor seeks to frustrate
- attackers from linking communication partners, or from linking
- multiple communications to or from a single point. Within this
- main goal, however, several design considerations have directed
- Tor's evolution.
- First, we have tried to build a {\bf deployable} system. [XXX why?]
- This requirement precludes designs that are expensive to run (for
- example, by requiring more bandwidth than volunteers will easily
- provide); designs that place a heavy liability burden on operators
- (for example, by allowing attackers to implicate operators in illegal
- activities); and designs that are difficult or expensive to implement
- (for example, by requiring kernel patches to many operating systems,
- or ). [Only anon people need to run special software! Look at minion
- reviews]
- Second, the system must be {\bf usable}. A hard-to-use system has
- fewer users --- and because anonymity systems hide users among users, a
- system with fewer users provides less anonymity. Thus, usability is
- not only a convenience, but is a security requirement for anonymity
- systems. In order to be usable, Tor should with most of a
- user's unmodified aplication; shouldn't introduce prohibitive delays; and
- [XXX what else?].
- Third, the protocol must be {\bf extensible}, so that it can serve as
- a test-bed for future research in low-latency anonymity systems.
- (Note that while an extensible protocol benefits researchers, there is
- a danger that differing choices of extensions will render users
- distinguishable. Thus, implementations should not permit different
- protocol extensions to coexist in a single deployed network.)
- % We should mention that there's a specification someplace: the spec makes us
- % easier to extend too. -NM
- The protocol's design and security parameters must be {\bf
- conservative}. Additional features impose implementation and
- complexity costs. [XXX Say that we don't want to try to come up with
- speculative solutions to problems we don't KNOW how to solve? -NM]
- \subsection{Non-goals}
- In favoring conservative, deployable designs, we have explicitly
- deferred a number of goals --- not because they are not desirable in
- anonymity systems --- but because they are either solved
- elsewhere, or an area of active research without a generally accepted
- solution.
- Unlike Tarzan or Morphmix, Tor does not attempt to scale to completely
- decentralized peer-to-peer environments with thousands of short-lived
- servers, many of which may be controlled by an adversary.
- Tor does not claim to provide a definitive solution to end-to-end
- timing or intersection attacks for users who do not run their own
- Onion Routers.
- % Mention would-be approaches. -NM
- % Does that mean we do claim to solve intersection attack for
- % the enclave-firewall model? -RD
- Tor does not provide \emph{protocol normalization} like the Anonymizer or
- Privoxy. In order to provide client indistinguishibility for
- complex and variable protocols such as HTTP, Tor must be layered with
- a filtering proxy such as Privoxy. Similarly, Tor does not currently
- integrate tunneling for non-stream-based protocols; this too must be
- provided by an external service.
- Tor is not steganographic: it doesn't try to conceal which users are
- sending or receiving communications.
- \SubSection{Adversary Model}
- \label{subsec:adversary-model}
- Like all practical low-latency systems, Tor is not secure against a
- global passive adversary, which is the most commonly assumed adversary
- for analysis of theoretical anonymous communication designs. The adversary
- we assume
- is weaker than global with respect to distribution, but it is not
- merely passive.
- We assume a threat model that expands on that from \cite{or-pet00}.
- The basic adversary components we consider are:
- \begin{description}
- \item[Observer:] can observe a connection (e.g., a sniffer on an
- Internet router), but cannot initiate connections. Observations may
- include timing and/or volume of packets as well as appearance of
- individual packets (including headers and content).
- \item[Disrupter:] can delay (indefinitely) or corrupt traffic on a
- link. Can change all those things that an observer can observe up to
- the limits of computational ability (e.g., cannot forge signatures
- unless a key is compromised).
- \item[Hostile initiator:] can initiate (or destroy) connections with
- specific routes as well as vary the timing and content of traffic
- on the connections it creates. A special case of the disrupter with
- additional abilities appropriate to its role in forming connections.
- \item[Hostile responder:] can vary the traffic on the connections made
- to it including refusing them entirely, intentionally modifying what
- it sends and at what rate, and selectively closing them. Also a
- special case of the disrupter.
- \item[Key breaker:] can break the key used to encrypt connection
- initiation requests sent to a Tor-node.
- % Er, there are no long-term private decryption keys. They have
- % long-term private signing keys, and medium-term onion (decryption)
- % keys. Plus short-term link keys. Should we lump them together or
- % separate them out? -RD
- %
- % Hmmm, I was talking about the keys used to encrypt the onion skin
- % that contains the public DH key from the initiator. Is that what you
- % mean by medium-term onion key? (``Onion key'' used to mean the
- % session keys distributed in the onion, back when there were onions.)
- % Also, why are link keys short-term? By link keys I assume you mean
- % keys that neighbor nodes use to superencrypt all the stuff they send
- % to each other on a link. Did you mean the session keys? I had been
- % calling session keys short-term and everything else long-term. I
- % know I was being sloppy. (I _have_ written papers formalizing
- % concepts of relative freshness.) But, there's some questions lurking
- % here. First up, I don't see why the onion-skin encryption key should
- % be any shorter term than the signature key in terms of threat
- % resistance. I understand that how we update onion-skin encryption
- % keys makes them depend on the signature keys. But, this is not the
- % basis on which we should be deciding about key rotation. Another
- % question is whether we want to bother with someone who breaks a
- % signature key as a particular adversary. He should be able to do
- % nearly the same as a compromised tor-node, although they're not the
- % same. I reworded above, I'm thinking we should leave other concerns
- % for later. -PS
-
- \item{Hostile Tor node:] can arbitrarily manipulate the
- connections under its control, as well as creating new connections
- (that pass through itself).
- \end{description}
- All feasible adversaries can be composed out of these basic
- adversaries. This includes combinations such as one or more
- compromised Tor-nodes cooperating with disrupters of links on which
- those nodes are not adjacent, or such as combinations of hostile
- outsiders and link observers (who watch links between adjacent
- Tor-nodes). Note that one type of observer might be a Tor-node. This
- is sometimes called an honest-but-curious adversary. While an observer
- Tor-node will perform only correct protocol interactions, it might
- share information about connections and cannot be assumed to destroy
- session keys at end of a session. Note that a compromised Tor-node is
- stronger than any other adversary component in the sense that
- replacing a component of any adversary with a compromised Tor-node
- results in a stronger overall adversary (assuming that the compromised
- Tor-node retains the same signature keys and other private
- state-information as the component it replaces).
- In general we are more focused on traffic analysis attacks than
- traffic confirmation attacks. A user who runs a Tor proxy on his own
- machine, connects to some remote Tor-node and makes a connection to an
- open Internet site, such as a public web server, is vulnerable to
- traffic confirmation. That is, an active attacker who suspects that
- the particular client is communicating with the particular server will
- be able to confirm this if she can attack and observe both the
- connection between the Tor network and the client and that between the
- Tor network and the server. Even a purely passive attacker will be
- able to confirm if the timing and volume properties of the traffic on
- the connnection are unique enough. This is not to say that Tor offers
- no resistance to traffic confirmation; it does. We defer discussion
- of this point and of particular attacks until Section~\ref{sec:attacks},
- after we have described Tor in more detail. However, we note here some
- basic assumptions that affect the threat model.
- [XXX I think this next subsection should be cut, leaving its points
- for the attacks section. But I'm leaving it here for now. The above
- line refers to the immediately following SubSection.-PS]
- \SubSection{Known attacks against low-latency anonymity systems}
- \label{subsec:known-attacks}
- % Should be merged into ``Threat model'' and reiterated in Attacks. -NM
- We discuss each of these attacks in more detail below, along with the
- aspects of the Tor design that provide defense. We provide a summary
- of the attacks and our defenses against them in Section~\ref{sec:attacks}.
- Passive attacks:
- simple observation,
- timing correlation,
- size correlation,
- option distinguishability,
- Active attacks:
- key compromise,
- iterated subpoena,
- run recipient,
- run a hostile node,
- compromise entire path,
- selectively DOS servers,
- introduce timing into messages,
- directory attacks,
- tagging attacks,
- smear attacks,
- entrapment attacks
- \SubSection{Assumptions}
- % Should be merged into ``Threat model''.
- For purposes of this paper, we assume all directory servers are honest
- % No longer true, see subsec:dirservers below -RD
- and trusted. Perhaps more accurately, we assume that all users and
- nodes can perform their own periodic checks on information they have
- from directory servers and that all will always have access to at
- least one directory server that they trust and from which they obtain
- all directory information. Future work may include robustness
- techniques to cope with a minority dishonest servers.
- Somewhere between ten percent and twenty percent of nodes are assumed
- to be compromised. In some circumstances, e.g., if the Tor network is
- running on a hardened network where all operators have had
- background checks, the percent of compromised nodes might be much
- lower. It may be worthwhile to consider cases where many of the `bad'
- nodes are not fully compromised but simply (passive) observing
- adversaries or that some nodes have only had compromise of the keys
- that decrypt connection initiation requests. But, we assume for
- simplicity that `bad' nodes are compromised in the sense spelled out
- above. We assume that all adversary components, regardless of their
- capabilities are collaborating and are connected in an offline clique.
- We do not assume any hostile users, except in the context of
- % This sounds horrible. What do you mean we don't assume any hostile
- % users? Surely we can tolerate some? -RD
- rendezvous points. Nonetheless, we assume that users vary widely in
- both the duration and number of times they are connected to the Tor
- network. They can also be assumed to vary widely in the volume and
- shape of the traffic they send and receive.
- [XXX what else?]
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{The Tor Design}
- \label{sec:design}
- The Tor network is an overlay network; each node is called an onion router
- (OR). Onion routers run on normal computers without needing any special
- privileges. Each OR maintains a long-term TLS connection to every other
- OR (although we look at ways to relax this clique-topology assumption in
- section \ref{subsec:restricted-routes}). A subset of the ORs also act as
- directory servers, tracking which routers are currently in the network;
- see section \ref{subsec:dirservers} for directory server details. Users
- run local software called an onion proxy (OP) that fetches directories,
- establishes paths (called \emph{virtual circuits}) over the network,
- and handles connections from the user applications. Onion proxies accept
- TCP streams and multiplex them across the virtual circuit. The onion
- router on the other side of the circuit connects to the destinations of
- the TCP streams and relays data.
- Onion routers have three types of keys. The first key is the identity
- (signing) key. An OR uses this key to sign TLS certificates, to sign its
- router descriptor (a summary of its keys, address, bandwidth, exit policy,
- etc), and to sign directories if it is a directory server. Changing the
- identity key of a router is considered equivalent to creating a new
- router. The second key is the onion (decryption) key, which is used
- for decrypting requests from users to set up a circuit and negotiate
- ephemeral keys. Thirdly, each OR shares link keys (generated by TLS)
- with the other ORs it's connected to. We discuss rotating these keys in
- Section \ref{subsec:rotating-keys}.
- Section \ref{subsec:cells} discusses the structure of the fixed-size
- \emph{cells} that are the unit of communication in Tor. We describe
- in Section \ref{subsec:circuits} how circuits work, and how they are
- built, extended, truncated, and destroyed. Section \ref{subsec:tcp}
- discusses the process of opening TCP streams through Tor, and finally
- Section \ref{subsec:congestion} talks about congestion control and
- fairness issues.
- \SubSection{Cells}
- \label{subsec:cells}
- Traffic passes from node to node in fixed-size cells. Each cell is 256
- bytes, and consists of a header and a payload. The header includes the
- circuit identifier (ACI) which specifies which circuit the cell refers to
- (many circuits can be multiplexed over the single TCP connection between
- ORs or between an OP and an OR), and a command to describe what to do
- with the cell's payload. Cells are either control cells, meaning they are
- intended to be interpreted by the node that receives them, or relay cells,
- meaning they carry end-to-end stream data. Controls cells can be one of:
- \emph{padding} (currently used for keepalive, but can be used for link
- padding), \emph{create} or \emph{created} (to set up a new circuit),
- or \emph{destroy} (to tear down a circuit).
- Relay cells have an additional header (the relay header) after the
- cell header, which specifies the stream identifier (many streams can
- be multiplexed over a circuit), an end-to-end checksum for integrity
- checking, and a relay command. Relay commands can be one of: \emph{relay
- data} (for data flowing down the stream), \emph{relay begin} (to open a
- stream), \emph{relay end} (to close a stream), \emph{relay connected}
- (to notify the OP that a relay begin has succeeded), \emph{relay
- extend} and \emph{relay extended} (to extend the circuit by a hop,
- and to acknowledge), \emph{relay truncate} and \emph{relay truncated}
- (to tear down only part of the circuit, and to acknowledge), \emph{relay
- sendme} (used for congestion control), and \emph{relay drop} (used to
- implement long-range dummies).
- We will talk more about each of these cell types below.
- % Nick: should there have been a table here? -RD
- \SubSection{Circuits and streams}
- \label{subsec:circuits}
- While the original Onion Routing design built one circuit for each stream,
- Tor circuits can be used by many streams. Thus because circuits can
- take several tenths of a second to construct due to crypto and network
- latency, users construct circuits preemptively. Users build a new circuit
- periodically (currently every minute) if the previous one has been used,
- and expire old used circuits that are no longer in use. Thus even very
- active users spend a negligible amount of time and CPU in building
- circuits, but only a limited number of requests can be linked to each
- other by a given exit node.
- Users set up circuits incrementally, negotiating a symmetric key with
- each hop one at a time. To create a new circuit, the user (call her
- Alice) sends a \emph{create} cell to the first node in her chosen
- path. The payload is the first half of the Diffie-Hellman handshake,
- encrypted to the onion key of the OR (call him Bob). Bob responds with a
- \emph{created} cell with the second half of the DH handshake, along with
- a hash of $K=g^{xy}$. The goal is to get unilateral entity authentication
- (Alice knows she's handshaking with Bob, Bob doesn't care who it is ---
- recall that Alice has no key and is trying to remain anonymous) and
- unilateral key authentication (Alice and Bob agree on a key, and Alice
- knows Bob is the only other person who could know it). We also want
- perfect forward secrecy, key freshness, etc.
- \begin{equation}
- \begin{aligned}
- \mathrm{Alice} \rightarrow \mathrm{Bob}&: E_{PK_{Bob}}(g^x) \\
- \mathrm{Bob} \rightarrow \mathrm{Alice}&: g^y, H(K | \mathrm{``handshake''}) \\
- \end{aligned}
- \end{equation}
- Being able to prove knowledge of this $K$ shows both that it was Bob
- who received $g^x$, and that it was Bob who came up with $y$. We use
- PK encryption in the first step (rather than, eg, using the first two
- steps of STS, which has a signature in the second step) because we
- don't have enough room in a single cell for a public key and also a
- signature. Preliminary analysis with the NRL protocol analyzer shows
- the above protocol to be secure (including providing PFS) under the
- traditional Dolev-Yao model.
- % cite Cathy? -RD
- % did I use the buzzwords correctly? -RD
- To extend a circuit past the first hop, Alice sends a \emph{relay extend}
- cell to the last node in the circuit, specifying the address of the new
- OR and an encrypted $g^x$ for it. That node copies the half-handshake
- into a \emph{create} cell, and passes it to the new OR to extend the
- circuit. When it responds with a \emph{created} cell, the penultimate OR
- copies the payload into a \emph{relay extended} cell and passes it back.
- % Nick: please fix my "that OR" pronouns -RD
- Once Alice has established the circuit (so she shares a key with each
- OR on the circuit), she can send relay cells.
- %The stream ID in the relay header indicates to which stream the cell belongs.
- % Nick: should i include the above line?
- Alice can address each relay cell to any of the ORs on the circuit. To
- construct a relay cell destined for a given OR, she iteratively
- encrypts the cell payload (that is, the relay header and payload)
- with the symmetric key of each hop up to that node. Then, at each hop
- down the circuit, the OR decrypts the cell payload and checks whether
- it recognizes the stream ID. A stream ID is recognized either if it
- is an already open stream at that OR, or if it is equal to zero. The
- zero stream ID is treated specially, and is used for control messages,
- e.g. starting a new stream. If the stream ID is unrecognized, the OR
- sends the relay cell downstream. This \emph{leaky pipe} circuit design
- allows Alice's streams to exit at different ORs, for example to tolerate
- different exit policies, or to keep the ORs from knowing that two streams
- originate at the same person.
- To tear down a circuit, Alice sends a destroy control cell. Each OR
- in the circuit receives the destroy cell, closes all open streams on
- that circuit, and passes a new destroy cell forward. But since circuits
- can be built incrementally, they can also be torn down incrementally:
- Alice can send a relay truncate cell to a node along the circuit. That
- node will send a destroy cell forward, and reply with a relay truncated
- acknowledgement. Alice might truncate her circuit so she can extend it
- to different nodes without notifying the first few nodes (or somebody
- observing them) that she is changing her circuit. That is, nodes in the
- middle are not even aware that the circuit was truncated, because the
- relay cells are encrypted. Similarly, if a node on the circuit goes down,
- the adjacent node can send a relay truncated back to Alice. Thus the
- ``break a node and see which circuits go down'' attack is weakened.
- \SubSection{Tagging attacks on streams}
- end-to-end integrity checking. (Mention tagging.)
- \SubSection{Opening and closing streams}
- \label{subsec:tcp}
- Describe how TCP connections get opened. (Mention DNS issues)
- Descibe closing TCP connections and 2-END handshake to mirror TCP
- close handshake.
- \SubSection{Congestion control and fairness}
- \label{subsec:congestion}
- Even with bandwidth throttling, we still need to worry about congestion,
- either accidental or intentional. If a lot of people make circuits into
- the same node, and they all come out through the same connection, then
- that connection may become saturated (be unable to send out cells as
- quickly as it wants to). For example, an adversary can make a 'put'
- request through the onion routing network to a webserver he owns,
- and then refuse to read any of the bytes at the webserver end of the
- circuit. These bottlenecks can propagate back through the entire network,
- mucking up everything.
- Describe circuit-level and stream-level
- congestion control issues and solutions.
- Describe circuit-level and stream-level fairness issues; cite Marc's
- anonnet stuff.
- \Section{Other design decisions}
- \SubSection{Resource management and DoS prevention}
- Describe DoS prevention. cookies before tls begins, rate limiting of
- create cells, link-to-link rate limiting, etc.
- Mention twins, what the do, what they can't.
- How we should do sequencing and acking like TCP so that we can better
- tolerate lost data cells.
- Mention that designers have to choose what you send across your
- circuit: wrapped IP packets, wrapped stream data, etc. [Disspell
- TCP-over-TCP misconception.]
- Mention that OR-to-OR connections should be highly reliable. If
- they aren't, everything can stall.
- \SubSection{Exit policies and abuse}
- \label{subsec:exitpolicies}
- Exit abuse is a serious barrier to wide-scale Tor deployment --- we
- must block or limit attacks and other abuse that users can do through
- the Tor network.
- Each onion router's \emph{exit policy} describes to which external
- addresses and ports the router will permit stream connections. On one end
- of the spectrum are \emph{open exit} nodes that will connect anywhere;
- on the other end are \emph{middleman} nodes that only relay traffic to
- other Tor nodes, and \emph{private exit} nodes that only connect locally
- or to addresses internal to that node's organization.
- This private exit
- node configuration is more secure for clients --- the adversary cannot
- see plaintext traffic leaving the network (e.g. to a webserver), so he
- is less sure of Alice's destination. More generally, nodes can require
- a variety of forms of traffic authentication \cite{onion-discex00}.
- Most onnion routers will function as \emph{limited exits} that permit
- connections to the world at large, but restrict access to certain abuse-prone
- addresses and services.
- Tor offers more reliability than the high-latency fire-and-forget
- anonymous email networks, because the sender opens a TCP stream
- with the remote mail server and receives an explicit confirmation of
- acceptance. But ironically, the private exit node model works poorly for
- email, when Tor nodes are run on volunteer machines that also do other
- things, because it's quite hard to configure mail transport agents so
- normal users can send mail normally, but the Tor process can only deliver
- mail locally. Further, most organizations have specific hosts that will
- deliver mail on behalf of certain IP ranges; Tor operators must be aware
- of these hosts and consider putting them in the Tor exit policy.
- The abuse issues on closed (e.g. military) networks are very different
- from the abuse on open networks like the Internet. While these IP-based
- access controls are still commonplace on the Internet, on closed networks,
- nearly all participants will be honest, and end-to-end authentication
- can be assumed for anything important.
- Tor is harder than minion because tcp doesn't include an abuse
- address. you could reach inside the http stream and change the agent
- or something, but that's a very specific case and probably won't help
- much anyway.
- And volunteer nodes don't resolve to anonymizer.mit.edu so it never
- even occurs to people that it wasn't you.
- Preventing abuse of open exit nodes is an unsolved problem. Princeton's
- CoDeeN project \cite{darkside} gives us a glimpse of what we're in for.
- % This is more speculative than a description of our design.
- but their solutions, which mainly involve rate limiting and blacklisting
- nodes which do bad things, don't translate directly to Tor. Rate limiting
- still works great, but Tor intentionally separates sender from recipient,
- so it's hard to know which sender was the one who did the bad thing,
- without just making the whole network wide open.
- even limiting most nodes to allow http, ssh, and aim to exit and reject
- all other stuff is sketchy, because plenty of abuse can happen over
- port 80. but it's a very good start, because it blocks most things,
- and because people are more used to the concept of port 80 abuse not
- coming from the machine's owner.
- we could also run intrusion detection system (IDS) modules at each tor
- node, to dynamically monitor traffic streams for attack signatures. it
- can even react when it sees a signature by closing the stream. but IDS's
- don't actually work most of the time, and besides, how do you write a
- signature for "is sending a mean mail"?
- we should run a squid at each exit node, to provide comparable anonymity
- to private exit nodes for cache hits, to speed everything up, and to
- have a buffer for funny stuff coming out of port 80. we could similarly
- have other exit proxies for other protocols, like mail, to check
- delivered mail for being spam.
- A mixture of open and restricted exit nodes will allow the most
- flexibility for volunteers running servers. But while a large number
- of middleman nodes is useful to provide a large and robust network,
- a small number of exit nodes still simplifies traffic analysis because
- there are fewer nodes the adversary needs to monitor, and also puts a
- greater burden on the exit nodes.
- The JAP cascade model is really nice because they only need one node to
- take the heat per cascade. On the other hand, a hydra scheme could work
- better (it's still hard to watch all the clients).
- Discuss importance of public perception, and how abuse affects it.
- ``Usability is a security parameter''. ``Public Perception is also a
- security parameter.''
- Discuss smear attacks.
- \SubSection{Directory Servers}
- \label{subsec:dirservers}
- First-generation Onion Routing designs \cite{or-jsac98,freedom2-arch} did
- % is or-jsac98 the right cite here? what's our stock OR cite? -RD
- in-band network status updates: each router flooded a signed statement
- to its neighbors, which propagated it onward. But anonymizing networks
- have different security goals than typical link-state routing protocols.
- For example, we worry more about delays (accidental or intentional)
- that can cause different parts of the network to have different pictures
- of link-state and topology. We also worry about attacks to deceive a
- client about the router membership list, topology, or current network
- state. Such \emph{partitioning attacks} on client knowledge help an
- adversary with limited resources to efficiently deploy those resources
- when attacking a target.
- Instead, Tor uses a small group of redundant directory servers to
- track network topology and node state such as current keys and exit
- policies. The directory servers are normal onion routers, but there are
- only a few of them and they are more trusted. They listen on a separate
- port as an HTTP server, both so participants can fetch current network
- state and router lists (a \emph{directory}), and so other onion routers
- can upload their router descriptors.
- [[mention that descriptors are signed with long-term keys; ORs publish
- regularly to dirservers; policies for generating directories; key
- rotation (link, onion, identity); Everybody already know directory
- keys; how to approve new nodes (advogato, sybil, captcha (RTT));
- policy for handling connections with unknown ORs; diff-based
- retrieval; diff-based consesus; separate liveness from descriptor
- list]]
- Of course, a variety of attacks remain. An adversary who controls a
- directory server can track certain clients by providing different
- information --- perhaps by listing only nodes under its control
- as working, or by informing only certain clients about a given
- node. Moreover, an adversary without control of a directory server can
- still exploit differences among client knowledge. If Eve knows that
- node $M$ is listed on server $D_1$ but not on $D_2$, she can use this
- knowledge to link traffic through $M$ to clients who have queried $D_1$.
- Thus these directory servers must be synchronized and redundant. The
- software is distributed with the signature public key of each directory
- server, and directories must be signed by a threshold of these keys.
- The directory servers in Tor are modeled after those in Mixminion
- \cite{minion-design}, but our situation is easier. Firstly, we make the
- simplifying assumption that all participants agree on who the directory
- servers are. Secondly, Mixminion needs to predict node behavior ---
- that is, build a reputation system for guessing future performance of
- nodes based on past performance, and then figure out a way to build
- a threshold consensus of these predictions. Tor just needs to get a
- threshold consensus of the current state of the network.
- The threshold consensus can be reached with standard Byzantine agreement
- techniques \cite{castro-liskov}.
- % Should I just stop the section here? Is the rest crap? -RD
- But this library, while more efficient than previous Byzantine agreement
- systems, is still complex and heavyweight for our purposes: we only need
- to compute a single algorithm, and we do not require strict in-order
- computation steps. The Tor directory servers build a consensus directory
- through a simple four-round broadcast protocol. First, each server signs
- and broadcasts its current opinion to the other directory servers; each
- server then rebroadcasts all the signed opinions it has received. At this
- point all directory servers check to see if anybody's cheating. If so,
- directory service stops, the humans are notified, and that directory
- server is permanently removed from the network. Assuming no cheating,
- each directory server then computes a local algorithm on the set of
- opinions, resulting in a uniform shared directory. Then the servers sign
- this directory and broadcast it; and finally all servers rebroadcast
- the directory and all the signatures.
- The rebroadcast steps ensure that a directory server is heard by either
- all of the other servers or none of them (some of the links between
- directory servers may be down). Broadcasts are feasible because there
- are so few directory servers (currently 3, but we expect to use as many
- as 9 as the network scales). The actual local algorithm for computing
- the shared directory is straightforward, and is described in the Tor
- specification \cite{tor-spec}.
- % we should, uh, add this to the spec. oh, and write it. -RD
- Using directory servers rather than flooding approaches provides
- simplicity and flexibility. For example, they don't complicate
- the analysis when we start experimenting with non-clique network
- topologies. And because the directories are signed, they can be cached at
- all the other onion routers (or even elsewhere). Thus directory servers
- are not a performance bottleneck when we have many users, and also they
- won't aid traffic analysis by forcing clients to periodically announce
- their existence to any central point.
- % Mention Hydra as an example of non-clique topologies. -NM, from RD
- \Section{Rendezvous points: location privacy}
- \label{sec:rendezvous}
- Rendezvous points are a building block for \emph{location-hidden services}
- (aka responder anonymity) in the Tor network. Location-hidden
- services means Bob can offer a tcp service, such as a webserver,
- without revealing the IP of that service.
- We provide this censorship resistance for Bob by allowing him to
- advertise several onion routers (his \emph{Introduction Points}) as his
- public location. Alice, the client, chooses a node for her \emph{Meeting
- Point}. She connects to one of Bob's introduction points, informs him
- about her meeting point, and then waits for him to connect to the meeting
- point. This extra level of indirection means Bob's introduction points
- don't open themselves up to abuse by serving files directly, eg if Bob
- chooses a node in France to serve material distateful to the French. The
- extra level of indirection also allows Bob to respond to some requests
- and ignore others.
- We provide the necessary glue so that Alice can view webpages from Bob's
- location-hidden webserver with minimal invasive changes. Both Alice and
- Bob must run local onion proxies.
- The steps of a rendezvous:
- \begin{tightlist}
- \item Bob chooses some Introduction Points, and advertises them on a
- Distributed Hash Table (DHT).
- \item Bob establishes onion routing connections to each of his
- Introduction Points, and waits.
- \item Alice learns about Bob's service out of band (perhaps Bob told her,
- or she found it on a website). She looks up the details of Bob's
- service from the DHT.
- \item Alice chooses and establishes a Meeting Point (MP) for this
- transaction.
- \item Alice goes to one of Bob's Introduction Points, and gives it a blob
- (encrypted for Bob) which tells him about herself, the Meeting Point
- she chose, and the first half of an ephemeral key handshake. The
- Introduction Point sends the blob to Bob.
- \item Bob chooses whether to ignore the blob, or to onion route to MP.
- Let's assume the latter.
- \item MP plugs together Alice and Bob. Note that MP can't recognize Alice,
- Bob, or the data they transmit (they share a session key).
- \item Alice sends a Begin cell along the circuit. It arrives at Bob's
- onion proxy. Bob's onion proxy connects to Bob's webserver.
- \item Data goes back and forth as usual.
- \end{tightlist}
- When establishing an introduction point, Bob provides the onion router
- with a public ``introduction'' key. The hash of this public key
- identifies a unique service, and (since Bob is required to sign his
- messages) prevents anybody else from usurping Bob's introduction point
- in the future. Bob uses the same public key when establishing the other
- introduction points for that service.
- The blob that Alice gives the introduction point includes a hash of Bob's
- public key to identify the service, an optional initial authentication
- token (the introduction point can do prescreening, eg to block replays),
- and (encrypted to Bob's public key) the location of the meeting point,
- a meeting cookie Bob should tell the meeting point so he gets connected to
- Alice, an optional authentication token so Bob can choose whether to respond,
- and the first half of a DH key exchange. When Bob connects to the meeting
- place and gets connected to Alice's pipe, his first cell contains the
- other half of the DH key exchange.
- % briefly talk about our notion of giving cookies to people proportional
- % to how important they are, for location-protected servers hardened
- % against DDoS threat? -RD
- \subsection{Integration with user applications}
- For each service Bob offers, he configures his local onion proxy to know
- the local IP and port of the server, a strategy for authorizating Alices,
- and a public key. We assume the existence of a robust decentralized
- efficient lookup system which allows authenticated updates, eg
- \cite{cfs:sosp01}. (Each onion router could run a node in this lookup
- system; also note that as a stopgap measure, we can just run a simple
- lookup system on the directory servers.) Bob publishes into the DHT
- (indexed by the hash of the public key) the public key, an expiration
- time (``not valid after''), and the current introduction points for that
- service. Note that Bob's webserver is completely oblivious to the fact
- that it's hidden behind the Tor network.
- As far as Alice's experience goes, we require that her client interface
- remain a SOCKS proxy, and we require that she shouldn't have to modify
- her applications. Thus we encode all of the necessary information into
- the hostname (more correctly, fully qualified domain name) that Alice
- uses, eg when clicking on a url in her browser. Location-hidden services
- use the special top level domain called `.onion': thus hostnames take the
- form x.y.onion where x encodes the hash of PK, and y is the authentication
- cookie. Alice's onion proxy examines hostnames and recognizes when they're
- destined for a hidden server. If so, it decodes the PK and starts the
- rendezvous as described in the table above.
- \subsection{Previous rendezvous work}
- Ian Goldberg developed a similar notion of rendezvous points for
- low-latency anonymity systems \cite{ian-thesis}. His ``service tag''
- is the same concept as our ``hash of service's public key''. We make it
- a hash of the public key so it can be self-authenticating, and so the
- client can recognize the same service with confidence later on. His
- design differs from ours in the following ways though. Firstly, Ian
- suggests that the client should manually hunt down a current location of
- the service via Gnutella; whereas our use of the DHT makes lookup faster,
- more robust, and transparent to the user. Secondly, the client and server
- can share ephemeral DH keys, so at no point in the path is the plaintext
- exposed. Thirdly, our design is much more practical for deployment in a
- volunteer network, in terms of getting volunteers to offer introduction
- and meeting point services. The introduction points do not output any
- bytes to the clients. And the meeting points don't know the client,
- the server, or the stuff being transmitted. The indirection scheme
- is also designed with authentication/authorization in mind -- if the
- client doesn't include the right cookie with its request for service,
- the server doesn't even acknowledge its existence.
- \Section{Analysis}
- How well do we resist chosen adversary?
- How well do we meet stated goals?
- Mention jurisdictional arbitrage.
- Pull attacks and defenses into analysis as a subsection
- \Section{Maintaining anonymity in Tor}
- \label{sec:maintaining-anonymity}
- [Put as much of this as a part of open issues as is possible.]
- [what's an anonymity set?]
- packet counting attacks work great against initiators. need to do some
- level of obfuscation for that. standard link padding for passive link
- observers. long-range padding for people who own the first hop. are
- we just screwed against people who insert timing signatures into your
- traffic?
- Even regardless of link padding from Alice to the cloud, there will be
- times when Alice is simply not online. Link padding, at the edges or
- inside the cloud, does not help for this.
- how often should we pull down directories? how often send updated
- server descs?
- when we start up the client, should we build a circuit immediately,
- or should the default be to build a circuit only on demand? should we
- fetch a directory immediately?
- would we benefit from greater synchronization, to blend with the other
- users? would the reduced speed hurt us more?
- does the "you can't see when i'm starting or ending a stream because
- you can't tell what sort of relay cell it is" idea work, or is just
- a distraction?
- does running a server actually get you better protection, because traffic
- coming from your node could plausibly have come from elsewhere? how
- much mixing do you need before this is actually plausible, or is it
- immediately beneficial because many adversary can't see your node?
- do different exit policies at different exit nodes trash anonymity sets,
- or not mess with them much?
- do we get better protection against a realistic adversary by having as
- many nodes as possible, so he probably can't see the whole network,
- or by having a small number of nodes that mix traffic well? is a
- cascade topology a more realistic way to get defenses against traffic
- confirmation? does the hydra (many inputs, few outputs) topology work
- better? are we going to get a hydra anyway because most nodes will be
- middleman nodes?
- using a circuit many times is good because it's less cpu work.
- good because of predecessor attacks with path rebuilding.
- bad because predecessor attacks can be more likely to link you with a
- previous circuit since you're so verbose.
- bad because each thing you do on that circuit is linked to the other
- things you do on that circuit.
- how often to rotate?
- how to decide when to exit from middle?
- when to truncate and re-extend versus when to start new circuit?
- Because Tor runs over TCP, when one of the servers goes down it seems
- that all the circuits (and thus streams) going over that server must
- break. This reduces anonymity because everybody needs to reconnect
- right then (does it? how much?) and because exit connections all break
- at the same time, and it also reduces usability. It seems the problem
- is even worse in a p2p environment, because so far such systems don't
- really provide an incentive for nodes to stay connected when they're
- done browsing, so we would expect a much higher churn rate than for
- onion routing. Are there ways of allowing streams to survive the loss
- of a node in the path?
- discuss topologies. Cite George's non-freeroutes paper. Maybe this
- graf goes elsewhere.
- discuss attracting users; incentives; usability.
- Choosing paths and path lengths.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Attacks and Defenses}
- \label{sec:attacks}
- Below we summarize a variety of attacks and how well our design withstands
- them.
- \begin{enumerate}
- \item \textbf{Passive attacks}
- \begin{itemize}
- \item \emph{Simple observation.}
- \item \emph{Timing correlation.}
- \item \emph{Size correlation.}
- \item \emph{Option distinguishability.}
- \end{itemize}
- \item \textbf{Active attacks}
- \begin{itemize}
- \item \emph{Key compromise.}
- \item \emph{Iterated subpoena.}
- \item \emph{Run recipient.}
- \item \emph{Run a hostile node.}
- \item \emph{Compromise entire path.}
- \item \emph{Selectively DoS servers.}
- \item \emph{Introduce timing into messages.}
- \item \emph{Tagging attacks.}
- the exit node can change the content you're getting to try to
- trick you. similarly, when it rejects you due to exit policy,
- it could give you a bad IP that sends you somewhere else.
- \end{itemize}
- we rely on DNS being globally consistent. if people in africa resolve
- IPs differently, then asking to extend a circuit to a certain IP can
- give away your origin.
- \item \textbf{Directory attacks}
- \begin{itemize}
- \item knock out a dirserver
- \item knock out half the dirservers
- \item trick user into using different software (with different dirserver
- keys)
- \item OR connects to the dirservers but nowhere else
- \item foo
- \end{itemize}
- \item \textbf{Attacks against rendezvous points}
- \begin{itemize}
- \item foo
- \end{itemize}
- \end{enumerate}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Future Directions and Open Problems}
- \label{sec:conclusion}
- % Mention that we need to do TCP over tor for reliability.
- Tor brings together many innovations into
- a unified deployable system. But there are still several attacks that
- work quite well, as well as a number of sustainability and run-time
- issues remaining to be ironed out. In particular:
- \begin{itemize}
- \item \emph{Scalability:} Since Tor's emphasis currently is on simplicity
- of design and deployment, the current design won't easily handle more
- than a few hundred servers, because of its clique topology. Restricted
- route topologies \cite{danezis-pets03} promise comparable anonymity
- with much better scaling properties, but we must solve problems like
- how to randomly form the network without introducing net attacks.
- % [cascades are a restricted route topology too. we must mention
- % earlier why we're not satisfied with the cascade approach.]-RD
- % [We do. At least
- \item \emph{Cover traffic:} Currently we avoid cover traffic because
- it introduces clear performance and bandwidth costs, but and its
- security properties are not well understood. With more research
- \cite{SS03,defensive-dropping}, the price/value ratio may change, both for
- link-level cover traffic and also long-range cover traffic. In particular,
- we expect restricted route topologies to reduce the cost of cover traffic
- because there are fewer links to cover.
- \item \emph{Better directory distribution:} Even with the threshold
- directory agreement algorithm described in \ref{subsec:dirservers},
- the directory servers are still trust bottlenecks. We must find more
- decentralized yet practical ways to distribute up-to-date snapshots of
- network status without introducing new attacks.
- \item \emph{Implementing location-hidden servers:} While Section
- \ref{sec:rendezvous} provides a design for rendezvous points and
- location-hidden servers, this feature has not yet been implemented.
- We will likely encounter additional issues, both in terms of usability
- and anonymity, that must be resolved.
- \item \emph{Wider-scale deployment:} The original goal of Tor was to
- gain experience in deploying an anonymizing overlay network, and learn
- from having actual users. We are now at the point where we can start
- deploying a wider network. We will see what happens!
- % ok, so that's hokey. fix it. -RD
- \item \emph{Further specification review:} Foo.
- \end{itemize}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %\Section{Acknowledgments}
- %% commented out for anonymous submission
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \bibliographystyle{latex8}
- \bibliography{tor-design}
- \end{document}
- % Style guide:
- % U.S. spelling
- % avoid contractions (it's, can't, etc.)
- % 'mix', 'mixes' (as noun)
- % 'mix-net'
- % 'mix', 'mixing' (as verb)
- % 'Mixminion Project'
- % 'Mixminion' (meaning the protocol suite or the network)
- % 'Mixmaster' (meaning the protocol suite or the network)
- % 'middleman' [Not with a hyphen; the hyphen has been optional
- % since Middle English.]
- % 'nymserver'
- % 'Cypherpunk', 'Cypherpunks', 'Cypherpunk remailer'
- % 'Onion Routing design', 'onion router' [note capitalization]
- %
- % 'Whenever you are tempted to write 'Very', write 'Damn' instead, so
- % your editor will take it out for you.' -- Misquoted from Mark Twain
|