12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871 |
- \documentclass[times,10pt,twocolumn]{article}
- \usepackage{latex8}
- \usepackage{times}
- \usepackage{url}
- \usepackage{graphics}
- \usepackage{amsmath}
- \pagestyle{empty}
- \renewcommand\url{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- \newcommand\emailaddr{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- % If an URL ends up with '%'s in it, that's because the line *in the .bib/.tex
- % file* is too long, so break it there (it doesn't matter if the next line is
- % indented with spaces). -DH
- %\newif\ifpdf
- %\ifx\pdfoutput\undefined
- % \pdffalse
- %\else
- % \pdfoutput=1
- % \pdftrue
- %\fi
- \newenvironment{tightlist}{\begin{list}{$\bullet$}{
- \setlength{\itemsep}{0mm}
- \setlength{\parsep}{0mm}
- % \setlength{\labelsep}{0mm}
- % \setlength{\labelwidth}{0mm}
- % \setlength{\topsep}{0mm}
- }}{\end{list}}
- \begin{document}
- %% Use dvipdfm instead. --DH
- %\ifpdf
- % \pdfcompresslevel=9
- % \pdfpagewidth=\the\paperwidth
- % \pdfpageheight=\the\paperheight
- %\fi
- \title{Tor: Design of a Second-Generation Onion Router}
- %\author{Roger Dingledine \\ The Free Haven Project \\ arma@freehaven.net \and
- %Nick Mathewson \\ The Free Haven Project \\ nickm@freehaven.net \and
- %Paul Syverson \\ Naval Research Lab \\ syverson@itd.nrl.navy.mil}
- \maketitle
- \thispagestyle{empty}
- \begin{abstract}
- We present Tor, a circuit-based low-latency anonymous communication
- system. Tor is the successor to Onion Routing
- and addresses many limitations in the original Onion Routing design.
- Tor works in a real-world Internet environment,
- % it's user-space too
- requires little synchronization or coordination between nodes, and
- provides a reasonable tradeoff between anonymity and usability/efficiency
- %protects against known anonymity-breaking attacks as well
- %as or better than other systems with similar design parameters.
- % and we present a big list of open problems at the end
- % and we present a new practical design for rendezvous points
- \end{abstract}
- %\begin{center}
- %\textbf{Keywords:} anonymity, peer-to-peer, remailer, nymserver, reply block
- %\end{center}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Overview}
- \label{sec:intro}
- Onion Routing is a distributed overlay network designed to anonymize
- low-latency TCP-based applications such as web browsing, secure shell,
- and instant messaging. Clients choose a path through the network and
- build a \emph{virtual circuit}, in which each node (or ``onion router'')
- in the path knows its
- predecessor and successor, but no others. Traffic flowing down the circuit
- is sent in fixed-size \emph{cells}, which are unwrapped by a symmetric key
- at each node (like the layers of an onion) and relayed downstream. The
- original Onion Routing project published several design and analysis
- papers
- \cite{or-ih96,or-jsac98,or-discex00,or-pet00}. While
- a wide area Onion Routing network was deployed for several weeks,
- the only long-running and publicly accessible
- implementation was a fragile proof-of-concept that ran on a single
- machine.
- % (which nonetheless processed several tens of thousands of connections
- %daily from thousands of global users).
- %%Do we really want to say this? It softens our motivation for the paper. -RD
- Many critical design and deployment issues were never resolved,
- and the design has not been updated in several years.
- Here we describe Tor, a protocol for asynchronous, loosely
- federated onion routers that provides the following improvements over
- the old Onion Routing design:
- \begin{tightlist}
- \item \textbf{Perfect forward secrecy:} The original Onion Routing
- design was vulnerable to a single hostile node recording traffic and later
- compromising successive nodes in the circuit and forcing them to
- decrypt it.
- Rather than using a single onion to lay each circuit,
- Tor now uses an incremental or \emph{telescoping}
- path-building design, where the initiator negotiates session keys with
- each successive hop in the circuit. Once these keys are deleted,
- subsequently compromised nodes cannot decrypt old traffic.
- As a side benefit, onion replay detection is no longer
- necessary, and the process of building circuits is more reliable, since
- the initiator knows when a hop fails and can then try extending to a new node.
- % Perhaps mention that not all of these are things that we invented. -NM
- \item \textbf{Separation of protocol cleaning from anonymity:}
- The original Onion Routing design required a separate ``application
- proxy'' for each
- supported application protocol---most
- of which were never written, so many applications were never supported.
- Tor uses the standard and near-ubiquitous SOCKS
- \cite{socks4,socks5} proxy interface, allowing us to support most TCP-based
- programs without modification. This design change allows Tor to
- use the filtering features of privacy-enhancing
- application-level proxies such as Privoxy \cite{privoxy} without having to
- incorporate those features itself.
- \item \textbf{Many TCP streams can share one circuit:} The original
- Onion Routing design built a separate circuit for each application-level
- request.
- This hurt performance by requiring multiple public key operations for
- every request, and also presented
- a threat to anonymity from building so many different circuits; see
- Section~\ref{sec:maintaining-anonymity}.
- Tor multiplexes multiple TCP streams along each virtual
- circuit, to improve efficiency and anonymity.
- \item \textbf{No mixing, padding, or traffic shaping:} The original
- Onion Routing design called for batching and reordering the cells arriving
- from each circuit,
- plus full link padding between onion routers and between onion
- proxies (that is, users) and onion routers \cite{or-jsac98}. The
- later analysis paper \cite{or-pet00} theorized \emph{traffic shaping}
- that provides similar protection but use less bandwidth, but did not
- provide details. However, recent research \cite{econymics} and deployment
- experience \cite{freedom21-security} suggest that this level of resource
- use is not practical or economical; and even full link padding is still
- vulnerable \cite{defensive-dropping}. Thus, until we have a proven and
- convenient design for traffic shaping or low-latency mixing that
- will improve anonymity against a realistic adversary, we leave these
- strategies out.
- \item \textbf{Leaky-pipe circuit topology:} Through in-band
- signalling within the
- circuit, Tor initiators can direct traffic to nodes partway down the
- circuit. This allows for long-range padding to frustrate traffic
- shape and volume attacks at the initiator \cite{defensive-dropping}.
- Because circuits are used by more than one application, it also
- allows traffic to exit the circuit from the middle---thus
- frustrating traffic shape and volume attacks based on observing the
- end of the circuit.
- \item \textbf{Congestion control:} Earlier anonymity designs do not
- address traffic bottlenecks. Unfortunately, typical approaches to load
- balancing and flow control in overlay networks involve inter-node control
- communication and global views of traffic. Tor's decentralized congestion
- control uses end-to-end acks to maintain reasonable anonymity while
- allowing nodes
- at the edges of the network to detect congestion or flooding attacks
- and send less data until the congestion subsides.
- \item \textbf{Directory servers:} The original Onion Routing design
- planned to flood link-state information through the network---an
- approach which can be unreliable and
- open to partitioning attacks or outright deception. Tor takes a simplified
- view towards distributing link-state information. Certain more trusted
- onion routers also act as directory servers: they provide signed
- \emph{directories} which describe the routers they know about and mark
- those that
- are currently up. Users periodically download these directories via HTTP.
- \item \textbf{End-to-end integrity checking:} The original Onion Routing
- design did no integrity checking on data. Any onion router on the circuit
- could change the contents of cells as they pass by---for example, to
- redirect a
- connection on the fly so it connects to a different webserver, or to
- tag encrypted traffic and look for the tagged traffic at the network
- edges \cite{minion-design}. Tor hampers these attacks by checking data
- integrity before it leaves the network.
- \item \textbf{Robustness to failed nodes:} A failed node in the old design
- meant that circuit-building failed, but thanks to Tor's step-by-step
- circuit building, users can notice failed
- nodes while building circuits and route around them. Additionally,
- liveness information from directories allows users to avoid
- unreliable nodes in the first place.
- %We further provide a
- %simple mechanism that allows connections to be established despite recent
- %node failure or slightly dated information from a directory server. Tor
- %permits onion routers to have \emph{router twins} --- nodes that share
- %the same private decryption key. Note that because connections now have
- %perfect forward secrecy, an onion router still cannot read the traffic
- %on a connection established through its twin even while that connection
- %is active. Also, which nodes are twins can change dynamically depending
- %on current circumstances, and twins may or may not be under the same
- %administrative authority.
- %
- %[Commented out; Router twins provide no real increase in robustness
- %to failed nodes. If a non-twinned node goes down, the
- %circuit-builder notices this and routes around it. Circuit-building
- %is offline, so there shouldn't even be a latency hit. -NM]
- \item \textbf{Variable exit policies:} Tor provides a consistent
- mechanism for
- each node to specify and advertise a policy describing the hosts and
- ports to which it will connect. These exit policies
- are critical in a volunteer-based distributed infrastructure, because
- each operator is comfortable with allowing different types of traffic
- to exit the Tor network from his node.
- \item \textbf{Implementable in user-space:} Unlike other anonymity systems
- like Freedom \cite{freedom2-arch}, Tor only attempts to anonymize TCP
- streams. Thus it does not require patches to an operating system's network
- stack (or built-in support) to operate. Although this approach is less
- flexible, it has proven valuable to Tor's portability and deployability.
- \item \textbf{Rendezvous points and location-protected servers:}
- Tor provides an integrated mechanism for responder anonymity via
- location-protected servers. Previous Onion Routing designs included
- long-lived ``reply onions'' which could be used to build virtual circuits
- to a hidden server, but a reply onion becomes useless if any node in
- the path goes down or rotates its keys, and it also does not provide
- forward security. In Tor's current design, clients negotiate {\it
- rendezvous points} to connect with hidden servers; reply onions are no
- longer required.
- \end{tightlist}
- We have implemented most of the above features. Our source code is
- available under a free license, and is not encumbered by patents. We have
- recently begun deploying a widespread alpha network to see how well the
- design works in practice, to get more experience with usability and users,
- and to provide a research platform for experimenting with new ideas.
- We review previous work in Section~\ref{sec:related-work}, describe
- our goals and assumptions in Section~\ref{sec:assumptions},
- and then address the above list of improvements in
- Sections~\ref{sec:design}-\ref{sec:rendezvous}. We
- summarize in Section \ref{sec:analysis}
- how our design stands up to known attacks, and conclude with a list of
- open problems.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Related work}
- \label{sec:related-work}
- Modern anonymity designs date to Chaum's Mix-Net\cite{chaum-mix} design of
- 1981. Chaum proposed hiding sender-recipient linkability by wrapping
- messages in layers of public key cryptography, and relaying them
- through a path composed of ``Mixes.'' These mixes in turn decrypt, delay,
- and re-order messages, before relaying them along the sender-selected
- path towards their destinations.
- Subsequent relay-based anonymity designs have diverged in two
- principal directions. Some have attempted to maximize anonymity at
- the cost of introducing comparatively large and variable latencies,
- for example, Babel\cite{babel}, Mixmaster\cite{mixmaster-spec}, and
- Mixminion\cite{minion-design}. Because of this
- trade-off, these \emph{high-latency} networks are well-suited for anonymous
- email, but introduce too much lag for interactive tasks such as web browsing,
- internet chat, or SSH connections.
- Tor belongs to the second category: \emph{low-latency} designs that attempt
- to anonymize interactive network traffic. Because these protocols typically
- involve a large number of packets that must be delivered quickly, it is
- difficult for them to prevent an attacker who can eavesdrop both ends of the
- communication from correlating the timing and volume
- of traffic entering the anonymity network with traffic leaving it. These
- protocols are also vulnerable against active attacks in which an
- adversary introduces timing patterns into traffic entering the network, and
- looks
- for correlated patterns among exiting traffic.
- Although some work has been done to frustrate
- these attacks,\footnote{
- The most common approach is to pad and limit communication to a constant
- rate, or to limit
- the variation in traffic shape. Doing so can have prohibitive bandwidth
- costs and/or performance limitations.
- %One can also use a cascade (fixed
- %shared route) with a relatively fixed set of users. This assumes a
- %significant degree of agreement and provides an easier target for an active
- %attacker since the endpoints are generally known.
- } most designs protect primarily against traffic analysis rather than traffic
- confirmation \cite{or-jsac98}---that is, they assume that the attacker is
- attempting to learn who is talking to whom, not to confirm a prior suspicion
- about who is talking to whom.
- The simplest low-latency designs are single-hop proxies such as the
- Anonymizer \cite{anonymizer}, wherein a single trusted server strips the
- data's origin before relaying it. These designs are easy to
- analyze, but require end-users to trust the anonymizing proxy.
- Concentrating the traffic to a single point increases the anonymity set
- (the set of people a given user is hiding among), but it can make traffic
- analysis easier: an adversary need only eavesdrop on the proxy to observe
- the entire system.
- More complex are distributed-trust, circuit-based anonymizing systems. In
- these designs, a user establishes one or more medium-term bidirectional
- end-to-end tunnels to exit servers, and uses those tunnels to deliver
- low-latency packets to and from one or more destinations per
- tunnel. %XXX reword
- Establishing tunnels is expensive and typically
- requires public-key cryptography, whereas relaying packets along a tunnel is
- comparatively inexpensive. Because a tunnel crosses several servers, no
- single server can link a user to her communication partners.
- In some distributed-trust systems, such as the Java Anon Proxy (also known
- as JAP or Web MIXes), users build their tunnels along a fixed shared route
- or \emph{cascade}. As with a single-hop proxy, this approach aggregates
- users into larger anonymity sets, but again an attacker only needs to
- observe both ends of the cascade to bridge all the system's traffic.
- The Java Anon Proxy's design seeks to prevent this by padding
- between end users and the head of the cascade \cite{web-mix}. However, the
- current implementation does no padding and thus remains vulnerable
- to both active and passive bridging.
- %XXX fix, yes it does, sort of.
- %XXX do a paragraph on p2p vs client-server
- \cite{tarzan:ccs02}
- \cite{morphmix:fc04}
- Systems such as Freedom and the original Onion Routing
- build the anonymous channel all at once, using a layered ``onion'' of
- public-key encrypted messages, each layer of which provides a set of session
- keys and the address of the next server in the channel. Tor as described
- herein, Tarzan, Morphmix, Cebolla \cite{cebolla}, and AnonNet
- \cite{anonnet} build the
- channel in stages, extending it one hop at a time. This approach
- makes perfect forward secrecy feasible.
- Distributed-trust anonymizing systems differ in how they prevent attackers
- from controlling too many servers and thus compromising too many user paths.
- Some protocols rely on a centrally maintained set of well-known anonymizing
- servers. The current Tor design falls into this category.
- Others (such as Tarzan and MorphMix) allow unknown users to run
- servers, while using a limited resource (DHT space for Tarzan; IP space for
- MorphMix) to prevent an attacker from owning too much of the network.
- Crowds uses a centralized ``blender'' to enforce Crowd membership
- policy. For small crowds it is suggested that familiarity with all
- members is adequate. For large diverse crowds, limiting accounts in
- control of any one party is more complex:
- ``(e.g., the blender administrator sets up an account for a user only
- after receiving a written, notarized request from that user) and each
- account to one jondo, and by monitoring and limiting the number of
- jondos on any one net- work (using IP address), the attacker would be
- forced to launch jondos using many different identities and on many
- different networks to succeed'' \cite{crowds-tissec}.
- PipeNet \cite{back01, pipenet}, another low-latency design proposed at
- about the same time as the original Onion Routing design, provided
- stronger anonymity at the cost of allowing a single user to shut
- down the network simply by not sending. Low-latency anonymous
- communication has also been designed for other environments, including
- ISDN \cite{isdn-mixes}
- % and mobile applications such as telephones and
- %active badging systems \cite{federrath-ih96,reed-protocols97}.
- Some systems, such as Crowds \cite{crowds-tissec}, do not rely on changing the
- appearance of packets to hide the path; rather they try to prevent an
- intermediary from knowing whether it is talking to an initiator
- or just another intermediary. Crowds uses no public-key
- encryption, but the responder and all data are visible to all
- nodes on the path; so anonymity of the connection initiator depends on
- filtering all identifying information from the data stream. Crowds only
- supports HTTP traffic.
- Hordes \cite{hordes-jcs} is based on Crowds but also uses multicast
- responses to hide the initiator. Herbivore \cite{herbivore} and
- P5 \cite{p5} go even further, requiring broadcast.
- Each uses broadcast in different ways, and trade-offs are made to
- make broadcast more practical. Both Herbivore and P5 are designed primarily
- for communication between communicating peers, although Herbivore
- permits external connections by requesting a peer to serve as a proxy.
- Allowing easy connections to nonparticipating responders or recipients
- is a practical requirement for many users, e.g., to visit
- nonparticipating Web sites or to exchange mail with nonparticipating
- recipients.
- Tor is not primarily designed for censorship resistance but rather
- for anonymous communication. However, Tor's rendezvous points, which
- enable connections between mutually anonymous entities, also
- facilitate connections to hidden servers. These building blocks to
- censorship resistance and other capabilities are described in
- Section~\ref{sec:rendezvous}. Location-hidden servers are an
- essential component for the anonymous publishing systems such as
- Eternity\cite{eternity}, Publius\cite{publius},
- Free Haven\cite{freehaven-berk}, and Tangler\cite{tangler}.
- STILL NOT MENTIONED:
- real-time mixes\\
- rewebbers\\
- cebolla\\
- [XXX Close by mentioning where Tor fits.]
- \Section{Design goals and assumptions}
- \label{sec:assumptions}
- \SubSection{Goals}
- Like other low-latency anonymity designs, Tor seeks to frustrate
- attackers from linking communication partners, or from linking
- multiple communications to or from a single point. Within this
- main goal, however, several design considerations have directed
- Tor's evolution.
- \begin{description}
- \item[Deployability:] The design must be one which can be implemented,
- deployed, and used in the real world. This requirement precludes designs
- that are expensive to run (for example, by requiring more bandwidth than
- volunteers are willing to provide); designs that place a heavy liability
- burden on operators (for example, by allowing attackers to implicate onion
- routers in illegal activities); and designs that are difficult or expensive
- to implement (for example, by requiring kernel patches, or separate proxies
- for every protocol). This requirement also precludes systems in which
- users who do not benefit from anonymity are required to run special
- software in order to communicate with anonymous parties.
- % Our rendezvous points require clients to use our software to get to
- % the location-hidden servers.
- % Or at least, they require somebody near the client-side running our
- % software. We haven't worked out the details of keeping it transparent
- % for Alice if she's using some other http proxy somewhere. I guess the
- % external http proxy should route through a Tor client, which automatically
- % translates the foo.onion address? -RD
- %
- % 1. Such clients do benefit from anonymity: they can reach the server.
- % Recall that our goal for location hidden servers is to continue to
- % provide service to priviliged clients when a DoS is happening or
- % to provide access to a location sensitive service. I see no contradiction.
- % 2. A good idiot check is whether what we require people to download
- % and use is more extreme than downloading the anonymizer toolbar or
- % privacy manager. I don't think so, though I'm not claiming we've already
- % got the installation and running of a client down to that simplicity
- % at this time. -PS
- \item[Usability:] A hard-to-use system has fewer users---and because
- anonymity systems hide users among users, a system with fewer users
- provides less anonymity. Usability is not only a convenience for Tor:
- it is a security requirement \cite{econymics,back01}. Tor
- should work with most of a user's unmodified applications; shouldn't
- introduce prohibitive delays; and should require the user to make as few
- configuration decisions as possible.
- \item[Flexibility:] The protocol must be flexible and
- well-specified, so that it can serve as a test-bed for future research in
- low-latency anonymity systems. Many of the open problems in low-latency
- anonymity networks (such as generating dummy traffic, or preventing
- pseudospoofing attacks) may be solvable independently from the issues
- solved by Tor; it would be beneficial if future systems were not forced to
- reinvent Tor's design decisions. (But note that while a flexible design
- benefits researchers, there is a danger that differing choices of
- extensions will render users distinguishable. Thus, experiments
- on extensions should be limited and should not significantly affect
- the distinguishability of ordinary users.
- % To run an experiment researchers must file an
- % anonymity impact statement -PS
- of implementations should
- not permit different protocol extensions to coexist in a single deployed
- network.)
- \item[Conservative design:] The protocol's design and security parameters
- must be conservative. Because additional features impose implementation
- and complexity costs, Tor should include as few speculative features as
- possible. (We do not oppose speculative designs in general; however, it is
- our goal with Tor to embody a solution to the problems in low-latency
- anonymity that we can solve today before we plunge into the problems of
- tomorrow.)
- % This last bit sounds completely cheesy. Somebody should tone it down. -NM
- \end{description}
- \SubSection{Non-goals}
- \label{subsec:non-goals}
- In favoring conservative, deployable designs, we have explicitly deferred
- a number of goals. Many of these goals are desirable in anonymity systems,
- but we choose to defer them either because they are solved elsewhere,
- or because they present an area of active research lacking a generally
- accepted solution.
- \begin{description}
- \item[Not Peer-to-peer:] Tarzan and Morphmix aim to
- scale to completely decentralized peer-to-peer environments with thousands
- of short-lived servers, many of which may be controlled by an adversary.
- Because of the many open problems in this approach, Tor uses a more
- conservative design.
- \item[Not secure against end-to-end attacks:] Tor does not claim to provide a
- definitive solution to end-to-end timing or intersection attacks. Some
- approaches, such as running an onion router, may help; see
- Section~\ref{sec:analysis} for more discussion.
- \item[No protocol normalization:] Tor does not provide \emph{protocol
- normalization} like Privoxy or the Anonymizer. In order to make clients
- indistinguishable when they use complex and variable protocols such as HTTP,
- Tor must be layered with a filtering proxy such as Privoxy to hide
- differences between clients, expunge protocol features that leak identity,
- and so on. Similarly, Tor does not currently integrate tunneling for
- non-stream-based protocols like UDP; this too must be provided by
- an external service.
- % Actually, tunneling udp over tcp is probably horrible for some apps.
- % Should this get its own non-goal bulletpoint? The motivation for
- % non-goal-ness would be burden on clients / portability.
- \item[Not steganographic:] Tor does not try to conceal which users are
- sending or receiving communications; it only tries to conceal whom they are
- communicating with.
- \end{description}
- \SubSection{Threat Model}
- \label{subsec:threat-model}
- A global passive adversary is the most commonly assumed threat when
- analyzing theoretical anonymity designs. But like all practical low-latency
- systems, Tor is not secure against this adversary. Instead, we assume an
- adversary that is weaker than global with respect to distribution, but that
- is not merely passive. Our threat model expands on that from
- \cite{or-pet00}.
- %%%% This is really keen analytical stuff, but it isn't our threat model:
- %%%% we just go ahead and assume a fraction of hostile nodes for
- %%%% convenience. -NM
- %
- %% The basic adversary components we consider are:
- %% \begin{description}
- %% \item[Observer:] can observe a connection (e.g., a sniffer on an
- %% Internet router), but cannot initiate connections. Observations may
- %% include timing and/or volume of packets as well as appearance of
- %% individual packets (including headers and content).
- %% \item[Disrupter:] can delay (indefinitely) or corrupt traffic on a
- %% link. Can change all those things that an observer can observe up to
- %% the limits of computational ability (e.g., cannot forge signatures
- %% unless a key is compromised).
- %% \item[Hostile initiator:] can initiate (or destroy) connections with
- %% specific routes as well as vary the timing and content of traffic
- %% on the connections it creates. A special case of the disrupter with
- %% additional abilities appropriate to its role in forming connections.
- %% \item[Hostile responder:] can vary the traffic on the connections made
- %% to it including refusing them entirely, intentionally modifying what
- %% it sends and at what rate, and selectively closing them. Also a
- %% special case of the disrupter.
- %% \item[Key breaker:] can break the key used to encrypt connection
- %% initiation requests sent to a Tor-node.
- %% % Er, there are no long-term private decryption keys. They have
- %% % long-term private signing keys, and medium-term onion (decryption)
- %% % keys. Plus short-term link keys. Should we lump them together or
- %% % separate them out? -RD
- %% %
- %% % Hmmm, I was talking about the keys used to encrypt the onion skin
- %% % that contains the public DH key from the initiator. Is that what you
- %% % mean by medium-term onion key? (``Onion key'' used to mean the
- %% % session keys distributed in the onion, back when there were onions.)
- %% % Also, why are link keys short-term? By link keys I assume you mean
- %% % keys that neighbor nodes use to superencrypt all the stuff they send
- %% % to each other on a link. Did you mean the session keys? I had been
- %% % calling session keys short-term and everything else long-term. I
- %% % know I was being sloppy. (I _have_ written papers formalizing
- %% % concepts of relative freshness.) But, there's some questions lurking
- %% % here. First up, I don't see why the onion-skin encryption key should
- %% % be any shorter term than the signature key in terms of threat
- %% % resistance. I understand that how we update onion-skin encryption
- %% % keys makes them depend on the signature keys. But, this is not the
- %% % basis on which we should be deciding about key rotation. Another
- %% % question is whether we want to bother with someone who breaks a
- %% % signature key as a particular adversary. He should be able to do
- %% % nearly the same as a compromised tor-node, although they're not the
- %% % same. I reworded above, I'm thinking we should leave other concerns
- %% % for later. -PS
- %% \item[Hostile Tor node:] can arbitrarily manipulate the
- %% connections under its control, as well as creating new connections
- %% (that pass through itself).
- %% \end{description}
- %
- %% All feasible adversaries can be composed out of these basic
- %% adversaries. This includes combinations such as one or more
- %% compromised Tor-nodes cooperating with disrupters of links on which
- %% those nodes are not adjacent, or such as combinations of hostile
- %% outsiders and link observers (who watch links between adjacent
- %% Tor-nodes). Note that one type of observer might be a Tor-node. This
- %% is sometimes called an honest-but-curious adversary. While an observer
- %% Tor-node will perform only correct protocol interactions, it might
- %% share information about connections and cannot be assumed to destroy
- %% session keys at end of a session. Note that a compromised Tor-node is
- %% stronger than any other adversary component in the sense that
- %% replacing a component of any adversary with a compromised Tor-node
- %% results in a stronger overall adversary (assuming that the compromised
- %% Tor-node retains the same signature keys and other private
- %% state-information as the component it replaces).
- First, we assume that a threshold of directory servers are honest,
- reliable, accurate, and trustworthy.
- %% the rest of this isn't needed, if dirservers do threshold concensus dirs
- % To augment this, users can periodically cross-check
- %directories from each directory server (trust, but verify).
- %, and that they always have access to at least one directory server that they trust.
- Second, we assume that somewhere between ten percent and twenty
- percent\footnote{In some circumstances---for example, if the Tor network is
- running on a hardened network where all operators have had background
- checks---the number of compromised nodes could be much lower.}
- of the Tor nodes accepted by the directory servers are compromised, hostile,
- and collaborating in an off-line clique. These compromised nodes can
- arbitrarily manipulate the connections that pass through them, as well as
- creating new connections that pass through themselves. They can observe
- traffic, and record it for later analysis. Honest participants do not know
- which servers these are.
- (In reality, many realistic adversaries might have `bad' servers that are not
- fully compromised but simply under observation, or that have had their keys
- compromised. But for the sake of analysis, we ignore, this possibility,
- since the threat model we assume is strictly stronger.)
- % This next paragraph is also more about analysis than it is about our
- % threat model. Perhaps we can say, ``users can connect to the network and
- % use it in any way; we consider abusive attacks separately.'' ? -NM
- Third, we constrain the impact of hostile users. Users are assumed to vary
- widely in both the duration and number of times they are connected to the Tor
- network. They can also be assumed to vary widely in the volume and shape of
- the traffic they send and receive. Hostile users are, by definition, limited
- to creating and varying their own connections into or through a Tor
- network. They may attack their own connections to try to gain identity
- information of the responder in a rendezvous connection. They can also try to
- attack sites through the Onion Routing network; however we will consider this
- abuse rather than an attack per se (see
- Section~\ref{subsec:exitpolicies}). Other than abuse, a hostile user's
- motivation to attack his own connections is limited to the network effects of
- such actions, such as denial of service (DoS) attacks. Thus, in this case,
- we can view user as simply an extreme case of the ordinary user; although
- ordinary users are not likely to engage in, e.g., IP spoofing, to gain their
- objectives.
- In general, we are more focused on traffic analysis attacks than
- traffic confirmation attacks.
- %A user who runs a Tor proxy on his own
- %machine, connects to some remote Tor-node and makes a connection to an
- %open Internet site, such as a public web server, is vulnerable to
- %traffic confirmation.
- That is, an active attacker who suspects that
- a particular client is communicating with a particular server can
- confirm this if she can modify and observe both the
- connection between the Tor network and the client and that between the
- Tor network and the server. Even a purely passive attacker can
- confirm traffic if the timing and volume properties of the traffic on
- the connection are unique enough. (This is not to say that Tor offers
- no resistance to traffic confirmation; it does. We defer discussion
- of this point and of particular attacks until Section~\ref{sec:attacks},
- after we have described Tor in more detail.)
- % XXX We need to say what traffic analysis is: How about...
- On the other hand, we {\it do} try to prevent an attacker from
- performing traffic analysis: that is, attempting to learn the communication
- partners of an arbitrary user.
- % XXX If that's not right, what is? It would be silly to have a
- % threat model section without saying what we want to prevent the
- % attacker from doing. -NM
- % XXX Also, do we want to mention linkability or building profiles? -NM
- Our assumptions about our adversary's capabilities imply a number of
- possible attacks against users' anonymity. Our adversary might try to
- mount passive attacks by observing the edges of the network and
- correlating traffic entering and leaving the network: either because
- of relationships in packet timing; relationships in the volume of data
- sent; [XXX simple observation??]; or relationships in any externally
- visible user-selected options. The adversary can also mount active
- attacks by trying to compromise all the servers' keys in a
- path---either through illegitimate means or through legal coercion in
- unfriendly jurisdiction; by selectively DoSing trustworthy servers; by
- introducing patterns into entering traffic that can later be detected;
- or by modifying data entering the network and hoping that trashed data
- comes out the other end. The attacker can additionally try to
- decrease the network's reliability by performing antisocial activities
- from reliable servers and trying to get them taken down.
- % XXX Should there be more or less? Should we turn this into a
- % bulleted list? Should we cut it entirely?
- We consider these attacks and more, and describe our defenses against them
- in Section~\ref{sec:attacks}.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{The Tor Design}
- \label{sec:design}
- The Tor network is an overlay network; each node is called an onion router
- (OR). Onion routers run as normal user-level processes without needing
- any special
- privileges. Currently, each OR maintains a long-term TLS \cite{TLS}
- connection to every other
- OR. (We examine some ways to relax this clique-topology assumption in
- Section~\ref{subsec:restricted-routes}.) A subset of the ORs also act as
- directory servers, tracking which routers are currently in the network;
- see Section~\ref{subsec:dirservers} for directory server details. Users
- run local software called an onion proxy (OP) to fetch directories,
- establish paths (called \emph{virtual circuits}) across the network,
- and handle connections from user applications. Onion proxies accept
- TCP streams and multiplex them across the virtual circuit. The onion
- router on the other side
- % I don't mean other side, I mean wherever it is on the circuit. But
- % don't want to introduce complexity this early? Hm. -RD
- of the circuit connects to the destinations of
- the TCP streams and relays data.
- Each onion router uses three public keys: a long-term identity key, a
- short-term onion key, and a short-term link key. The identity
- (signing) key is used to sign TLS certificates, to sign its router
- descriptor (a summary of its keys, address, bandwidth, exit policy,
- etc), and to sign directories if it is a directory server. Changing
- the identity key of a router is considered equivalent to creating a
- new router. The onion (decryption) key is used for decrypting requests
- from users to set up a circuit and negotiate ephemeral keys. Finally,
- link keys are used by the TLS protocol when communicating between
- onion routers. We discuss rotating these keys in
- Section~\ref{subsec:rotating-keys}.
- Section~\ref{subsec:cells} discusses the structure of the fixed-size
- \emph{cells} that are the unit of communication in Tor. We describe
- in Section~\ref{subsec:circuits} how virtual circuits are
- built, extended, truncated, and destroyed. Section~\ref{subsec:tcp}
- describes how TCP streams are routed through the network, and finally
- Section~\ref{subsec:congestion} talks about congestion control and
- fairness issues.
- \SubSection{Cells}
- \label{subsec:cells}
- % I think we should describe connections before cells. -NM
- Traffic passes from one OR to another, or between a user's OP and an OR,
- in fixed-size cells. Each cell is 256
- bytes, and consists of a header and a payload. The header includes an
- anonymous circuit identifier (ACI) that specifies which circuit the
- % Should we replace ACI with circID ? What is this 'anonymous circuit'
- % thing anyway? -RD
- cell refers to
- (many circuits can be multiplexed over the single TCP connection between
- ORs or between an OP and an OR), and a command to describe what to do
- with the cell's payload. Cells are either \emph{control} cells, which are
- interpreted by the node that receives them, or \emph{relay} cells,
- which carry end-to-end stream data. Controls cells can be one of:
- \emph{padding} (currently used for keepalive, but also usable for link
- padding); \emph{create} or \emph{created} (used to set up a new circuit);
- or \emph{destroy} (to tear down a circuit).
- % We need to say that ACIs are connection-specific: each circuit has
- % a different ACI along each connection. -NM
- % agreed -RD
- Relay cells have an additional header (the relay header) after the
- cell header, containing the stream identifier (many streams can
- be multiplexed over a circuit); an end-to-end checksum for integrity
- checking; the length of the relay payload; and a relay command. Relay
- commands can be one of: \emph{relay
- data} (for data flowing down the stream), \emph{relay begin} (to open a
- stream), \emph{relay end} (to close a stream), \emph{relay connected}
- (to notify the OP that a relay begin has succeeded), \emph{relay
- extend} and \emph{relay extended} (to extend the circuit by a hop,
- and to acknowledge), \emph{relay truncate} and \emph{relay truncated}
- (to tear down only part of the circuit, and to acknowledge), \emph{relay
- sendme} (used for congestion control), and \emph{relay drop} (used to
- implement long-range dummies).
- We describe each of these cell types in more detail below.
- % Nick: should there have been a table here? -RD
- % Maybe. -NM
- \SubSection{Circuits and streams}
- \label{subsec:circuits}
- % I think when we say ``the user,'' maybe we should say ``the user's OP.''
- The original Onion Routing design built one circuit for each
- TCP stream. Because building a circuit can take several tenths of a
- second (due to public-key cryptography delays and network latency),
- this design imposed high costs on applications like web browsing that
- open many TCP streams.
- In Tor, each circuit can be shared by many TCP streams. To avoid
- delays, users construct circuits preemptively. To limit linkability
- among the streams, users rotate connections by building a new circuit
- periodically (currently every minute) if the previous one has been
- used, and expire old used circuits that are no longer in use. Thus
- even heavy users spend a negligible amount of time and CPU in
- building circuits, but only a limited number of requests can be linked
- to each other by a given exit node. Also, because circuits are built
- in the background, failed routers do not affects user experience.
- \subsubsection{Constructing a circuit}
- Users construct each incrementally, negotiating a symmetric key with
- each hop one at a time. To begin creating a new circuit, the user
- (call her Alice) sends a \emph{create} cell to the first node in her
- chosen path. The cell's payload is the first half of the
- Diffie-Hellman handshake, encrypted to the onion key of the OR (call
- him Bob). Bob responds with a \emph{created} cell containg the second
- half of the DH handshake, along with a hash of the negotiated key
- $K=g^{xy}$. This protocol tries to achieve unilateral entity
- authentication (Alice knows she's handshaking with Bob, Bob doesn't
- care who is opening the circuit---Alice has no key and is trying to
- remain anonymous); unilateral key authentication (Alice and Bob
- agree on a key, and Alice knows Bob is the only other person who could
- know it). We also want perfect forward
- secrecy, key freshness, etc.
- \begin{equation}
- \begin{aligned}
- \mathrm{Alice} \rightarrow \mathrm{Bob}&: E_{PK_{Bob}}(g^x) \\
- \mathrm{Bob} \rightarrow \mathrm{Alice}&: g^y, H(K | \mathrm{``handshake"}) \\
- \end{aligned}
- \end{equation}
- The second step shows both that it was Bob
- who received $g^x$, and that it was Bob who came up with $y$. We use
- PK encryption in the first step (rather than, e.g., using the first two
- steps of STS, which has a signature in the second step) because we
- don't have enough room in a single cell for a public key and also a
- signature. Preliminary analysis with the NRL protocol analyzer shows
- the above protocol to be secure (including providing PFS) under the
- traditional Dolev-Yao model.
- % cite Cathy? -RD
- % did I use the buzzwords correctly? -RD
- % Hm. I think that this paragraph could go earlier in expository
- % order: we describe how to build whole circuit, then explain the
- % protocol in more detail. -NM
- To extend a circuit past the first hop, Alice sends a \emph{relay extend}
- cell to the last node in the circuit, specifying the address of the new
- OR and an encrypted $g^x$ for it. That node copies the half-handshake
- into a \emph{create} cell, and passes it to the new OR to extend the
- circuit. When it responds with a \emph{created} cell, the penultimate OR
- copies the payload into a \emph{relay extended} cell and passes it back.
- % Nick: please fix my "that OR" pronouns -RD
- \subsubsection{Relay cells}
- Once Alice has established the circuit (so she shares a key with each
- OR on the circuit), she can send relay cells.
- The stream ID in the relay header indicates to which stream the cell belongs.
- A relay cell can be addressed to any of the ORs on the circuit. To
- construct a relay cell addressed to a given OR, Alice iteratively
- encrypts the cell payload (that is, the relay header and payload)
- with the symmetric key of each hop up to that OR. Then, at each hop
- down the circuit, the OR decrypts the cell payload and checks whether
- it recognizes the stream ID. A stream ID is recognized either if it
- is an already open stream at that OR, or if it is equal to zero. The
- zero stream ID is treated specially, and is used for control messages,
- e.g. starting a new stream. If the stream ID is unrecognized, the OR
- passes the relay cell downstream. This \emph{leaky pipe} circuit topology
- allows Alice's streams to exit at different ORs on a single circuit.
- Alice may choose different exit points because of their exit policies,
- or to keep the ORs from knowing that two streams
- originate at the same person.
- To tear down a circuit, Alice sends a destroy control cell. Each OR
- in the circuit receives the destroy cell, closes all open streams on
- that circuit, and passes a new destroy cell forward. But since circuits
- can be built incrementally, they can also be torn down incrementally:
- Alice can instead send a relay truncate cell to a node along the circuit. That
- node will send a destroy cell forward, and reply with an acknowledgment
- (relay truncated). Alice might truncate her circuit so she can extend it
- to different nodes without signaling to the first few nodes (or somebody
- observing them) that she is changing her circuit. That is, nodes in the
- middle are not even aware that the circuit was truncated, because the
- relay cells are encrypted. Similarly, if a node on the circuit goes down,
- the adjacent node can send a relay truncated back to Alice. Thus the
- ``break a node and see which circuits go down'' attack is weakened.
- \SubSection{Opening and closing streams}
- \label{subsec:tcp}
- When Alice's application wants to open a TCP connection to a given
- address and port, it asks the OP (via SOCKS) to make the connection. The
- OP chooses the newest open circuit (or creates one if none is available),
- chooses a suitable OR on that circuit to be the exit node (usually the
- last node, but maybe others due to exit policy conflicts; see
- Section~\ref{sec:exit-policies}), chooses a new random stream ID for
- this stream,
- and delivers a relay begin cell to that exit node. It uses a stream ID
- of zero for the begin cell (so the OR will recognize it), and the relay
- payload lists the new stream ID and the destination address and port.
- Once the exit node completes the connection to the remote host, it
- responds with a relay connected cell through the circuit. Upon receipt,
- the OP notifies the application that it can begin talking.
- There's a catch to using SOCKS, though -- some applications hand the
- alphanumeric address to the proxy, while others resolve it into an IP
- address first and then hand the IP to the proxy. When the application
- does the DNS resolution first, Alice broadcasts her destination. Common
- applications like Mozilla and ssh have this flaw.
- In the case of Mozilla, we're fine: the filtering web proxy called Privoxy
- does the SOCKS call safely, and Mozilla talks to Privoxy safely. But a
- portable general solution, such as for ssh, is an open problem. We could
- modify the local nameserver, but this approach is invasive, brittle, and
- not portable. We could encourage the resolver library to do resolution
- via TCP rather than UDP, but this approach is hard to do right, and also
- has portability problems. Our current answer is to encourage the use of
- privacy-aware proxies like Privoxy wherever possible, and also provide
- a tool similar to \emph{dig} that can do a private lookup through the
- Tor network.
- Ending a Tor stream is analogous to ending a TCP stream: it uses a
- two-step handshake for normal operation, or a one-step handshake for
- errors. If one side of the stream closes abnormally, that node simply
- sends a relay teardown cell, and tears down the stream. If one side
- % Nick: mention relay teardown in 'cell' subsec? good enough name? -RD
- of the stream closes the connection normally, that node sends a relay
- end cell down the circuit. When the other side has sent back its own
- relay end, the stream can be torn down. This two-step handshake allows
- for TCP-based applications that, for example, close a socket for writing
- but are still willing to read.
- \SubSection{Integrity checking on streams}
- In the old Onion Routing design, traffic was vulnerable to a
- malleability attack: an attacker could make changes to an encrypted
- cell to create corresponding changes to the data leaving the network.
- (Even an external adversary could do this, despite link encryption!)
- This weakness allowed an adversary to change a create cell to a destroy
- cell; change the destination address in a relay begin cell to the
- adversary's webserver; or change a user on an ftp connection from
- typing ``dir'' to typing ``delete *''. Any node or observer along the
- path could introduce such corruption in a stream.
- Tor prevents external adversaries by mounting this attack simply by
- using TLS. Addressing the insider malleability attack, however, is
- more complex.
- Rather than doing integrity checking of the relay cells at each hop,
- which would increase packet size
- by a function of path length\footnote{This is also the argument against
- using recent cipher modes like EAX \cite{eax} --- we don't want the added
- message-expansion overhead at each hop, and we don't want to leak the path
- length (or pad to some max path length).}, we choose to
- % accept passive timing attacks,
- % (How? I don't get it. Do we mean end-to-end traffic
- % confirmation attacks? -NM)
- and perform integrity
- checking only at the edges of the circuit. When Alice negotiates a key
- with the exit hop, they both start a SHA-1 with some derivative of that key,
- thus starting out with randomness that only the two of them know. From
- then on they each incrementally add all the data bytes flowing across
- the stream to the SHA-1, and each relay cell includes the first 4 bytes
- of the current value of the hash.
- The attacker must be able to guess all previous bytes between Alice
- and Bob on that circuit (including the pseudorandomness from the key
- negotiation), plus the bytes in the current cell, to remove or modify the
- cell. Attacks on SHA-1 where the adversary can incrementally add to a
- hash to produce a new valid hash don't work,
- because all hashes are end-to-end encrypted across the circuit.
- The computational overhead isn't so bad, compared to doing an AES
- % XXX We never say we use AES. Say it somewhere above? -RD
- crypt at each hop in the circuit. We use only four bytes per cell to
- minimize overhead; the chance that an adversary will correctly guess a
- valid hash, plus the payload the current cell, is acceptly low, given
- that Alice or Bob tear down the circuit if they receive a bad hash.
- \SubSection{Rate limiting and fairness}
- Volunteers are generally more willing to run services that can limit
- their bandwidth usage. To accomodate them, Tor servers use a token
- bucket approach to limit the number of bytes they
- receive. Tokens are added to the bucket each second (when the bucket is
- full, new tokens are discarded.) Each token represents permission to
- receive one byte from the network --- to receive a byte, the connection
- must remove a token from the bucket. Thus if the bucket is empty, that
- connection must wait until more tokens arrive. The number of tokens we
- add enforces a long-term average rate of incoming bytes, while still
- permitting short-term bursts above the allowed bandwidth. Current bucket
- sizes are set to ten seconds worth of traffic.
- Further, we want to avoid starving any Tor streams. Entire circuits
- could starve if we read greedily from connections and one connection
- uses all the remaining bandwidth. We solve this by dividing the number
- of tokens in the bucket by the number of connections that want to read,
- and reading at most that number of bytes from each connection. We iterate
- this procedure until the number of tokens in the bucket is under some
- threshold (eg 10KB), at which point we greedily read from connections.
- Because the Tor protocol generates roughly the same number of outgoing
- bytes as incoming bytes, it is sufficient in practice to rate-limit
- incoming bytes.
- % Is it? Fun attack: I send you lots of 1-byte-at-a-time TCP frames.
- % In response, you send lots of 256 byte cells. Can I use this to
- % make you exceed your outgoing bandwidth limit by a factor of 256? -NM
- % Can we resolve this by, when reading from edge connections, rounding up
- % the bytes read (wrt buckets) to the nearest multiple of 256? -RD
- Further, inspired by Rennhard et al's design in \cite{anonnet}, a
- circuit's edges heuristically distinguish interactive streams from bulk
- streams by comparing the frequency with which they supply cells. We can
- provide good latency for interactive streams by giving them preferential
- service, while still getting good overall throughput to the bulk
- streams. Such preferential treatment presents a possible end-to-end
- attack, but an adversary who can observe both
- ends of the stream can already learn this information through timing
- attacks.
- \SubSection{Congestion control}
- \label{subsec:congestion}
- Even with bandwidth rate limiting, we still need to worry about
- congestion, either accidental or intentional. If enough users choose the
- same OR-to-OR connection for their circuits, that connection can become
- saturated. For example, an adversary could make a large HTTP PUT request
- through the onion routing network to a webserver he runs, and then
- refuse to read any of the bytes at the webserver end of the
- circuit. Without some congestion control mechanism, these bottlenecks
- can propagate back through the entire network. We describe our
- responses below.
- \subsubsection{Circuit-level}
- To control a circuit's bandwidth usage, each OR keeps track of two
- windows. The \emph{package window} tracks how many relay data cells the OR is
- allowed to package (from outside streams) for transmission back to the OP,
- and the \emph{deliver window} tracks how many relay data cells it is willing
- to deliver to streams outside the network. Each window is initialized
- (say, to 1000 data cells). When a data cell is packaged or delivered,
- the appropriate window is decremented. When an OR has received enough
- data cells (currently 100), it sends a relay sendme cell towards the OP,
- with stream ID zero. When an OR receives a relay sendme cell with stream
- ID zero, it increments its packaging window. Either of these cells
- increments the corresponding window by 100. If the packaging window
- reaches 0, the OR stops reading from TCP connections for all streams
- on the corresponding circuit, and sends no more relay data cells until
- receiving a relay sendme cell.
- The OP behaves identically, except that it must track a packaging window
- and a delivery window for every OR in the circuit. If a packaging window
- reaches 0, it stops reading from streams destined for that OR.
- \subsubsection{Stream-level}
- The stream-level congestion control mechanism is similar to the
- circuit-level mechanism above. ORs and OPs use relay sendme cells
- to implement end-to-end flow control for individual streams across
- circuits. Each stream begins with a package window (e.g. 500 cells),
- and increments the window by a fixed value (50) upon receiving a relay
- sendme cell. Rather than always returning a relay sendme cell as soon
- as enough cells have arrived, the stream-level congestion control also
- has to check whether data has been successfully flushed onto the TCP
- stream; it sends a relay sendme only when the number of bytes pending
- to be flushed is under some threshold (currently 10 cells worth).
- Currently, non-data relay cells do not affect the windows. Thus we
- avoid potential deadlock issues, e.g. because a stream can't send a
- relay sendme cell because its packaging window is empty.
- \subsubsection{Needs more research}
- We don't need to reimplement full TCP windows (with sequence numbers,
- the ability to drop cells when we're full and retransmit later, etc),
- because the TCP streams already guarantee in-order delivery of each
- cell. But we need to investigate further the effects of the current
- parameters on throughput and latency, while also keeping privacy in mind;
- see Section~\ref{sec:maintaining-anonymity} for more discussion.
- \Section{Other design decisions}
- \SubSection{Resource management and DoS prevention}
- \label{subsec:dos}
- Providing Tor as a public service provides many opportunities for an
- attacker to mount denial-of-service attacks against the network. While
- flow control and rate limiting (discussed in
- section~\ref{subsec:congestion}) prevents users from consuming more
- bandwidth than nodes are willing to provide, opportunities remain for
- consume more network resources than their fair share, or to render the
- network unusable for other users.
- First of all, there are a number of CPU-consuming denial-of-service
- attacks wherein an attacker can force an OR to perform expensive
- cryptographic operations. For example, an attacker who sends a
- \emph{create} cell full of junk bytes can force an OR to perform an RSA
- decrypt its half of the Diffie-Helman handshake. Similarly, an attacker
- fake the start of a TLS handshake, forcing the OR to carry out its
- (comparatively expensive) half of the handshake at no real computational
- cost to the attacker.
- To address these attacks, several approaches exist. First, ORs may
- demand proof-of-computation tokens \cite{hashcash} before beginning new
- TLS handshakes or accepting \emph{create} cells. So long as these
- tokens are easy to verify and computationally expensive to produce, this
- approach limits the DoS attack multiplier. Additionally, ORs may limit
- the rate at which they accept create cells and TLS connections, so that
- the computational work of doing so does not drown out the (comparatively
- inexpensive) work of symmetric cryptography needed to keep users'
- packets flowing. This rate limiting could, however, allows an attacker
- to slow down other users as they build new circuits.
- % What about link-to-link rate limiting?
- % This paragraph needs more references.
- More worrisome are distributed denial of service attacks wherein an
- attacker uses a large number of compromised hosts throughout the network
- to consume the Tor network's resources. Although these attacks are not
- new to the networking literature, some proposed approaches are a poor
- fit to anonymous networks. For example, solutions based on backtracking
- harmful traffic present a significant risk that an anonymity-breaking
- adversary could exploit the backtracking mechanism to compromise users'
- anonymity. [XXX So, what should we say here? -NM]
- % Now would be a good point to talk about twins. What the do, what
- % they can't.
- Attackers also have an opportunity to attack the Tor network by mounting
- attacks on the hosts and network links running it. If an attacker can
- successfully disrupt a single circuit or link along a virtual circuit,
- all currently open streams passing along that part of the circuit
- become unrecoverable, and are closed. The current Tor design treats
- such attacks as intermittent network failures, and depends on users and
- applications to respond or recover as appropriate. A possible future
- design could use an end-to-end based TCP-like acknowledgment protocol,
- so that no streams are lost unless the entry or exit point themselves
- are disrupted. This solution would require more buffering at exits,
- however, and its network properties still need to be investigated. [XXX
- That sounds really evasive. We should say more.]
- %[XXX Mention that OR-to-OR connections should be highly reliable
- % (whatever that means). If they aren't, everything can stall.]
- %=====================
- % This stuff should go elsewhere. Probably section 2.
- Channel-based anonymity designs must choose which protocol layer to
- anonymize. They may choose to intercept IP packets directly, and relay
- them whole (stripping the source address) as the contents of their
- anonymous channels \cite{tarzan:ccs02,freedom2-arch}. Alternatively,
- they may
- accept TCP streams and relay the data in those streams along the
- channel, ignoring the breakdown of that data into TCP frames. (Tor
- takes this approach, as does Rennhard's anonymity network \cite{anonnet}
- and Morphmix \cite{morphmix:fc04}.) Finally, they may accept
- application-level protocols (such as HTTP) and relay the application
- requests themselves along their anonymous channels.
- This protocol-layer decision represents a compromise between flexibility
- and anonymity. For example, a system that understands HTTP can strip
- identifying information from those requests; can take advantage of
- caching to limit the number of requests that leave the network; and can
- batch or encode those requests in order to minimize the number of
- connections. On the other hand, an IP-level anonymizer can handle
- nearly any protocol, even ones unforeseen by their designers. TCP-level
- anonymity networks like Tor present a middle approach: they are fairly
- application neutral (so long as the application supports, or can be
- tunneled across, TCP), but by treating application connections as data
- streams rather than raw TCP packets, they avoid the well-known
- inefficiencies of tunneling TCP over TCP \cite{tcp-over-tcp-is-bad}.
- % Is there a better tcp-over-tcp-is-bad reference?
- %Also mention that weirdo IP trickery requires kernel patches to most
- %operating systems? -NM
- \SubSection{Exit policies and abuse}
- \label{subsec:exitpolicies}
- Exit abuse is a serious barrier to wide-scale Tor deployment --- we
- must block or limit attacks and other abuse that users can do through
- the Tor network.
- Each onion router's \emph{exit policy} describes to which external
- addresses and ports the router will permit stream connections. On one end
- of the spectrum are \emph{open exit} nodes that will connect anywhere;
- on the other end are \emph{middleman} nodes that only relay traffic to
- other Tor nodes, and \emph{private exit} nodes that only connect locally
- or to addresses internal to that node's organization.
- This private exit
- node configuration is more secure for clients --- the adversary cannot
- see plaintext traffic leaving the network (e.g. to a webserver), so he
- is less sure of Alice's destination. More generally, nodes can require
- a variety of forms of traffic authentication \cite{or-discex00}.
- Most onnion routers will function as \emph{limited exits} that permit
- connections to the world at large, but restrict access to certain abuse-prone
- addresses and services.
- Tor offers more reliability than the high-latency fire-and-forget
- anonymous email networks, because the sender opens a TCP stream
- with the remote mail server and receives an explicit confirmation of
- acceptance. But ironically, the private exit node model works poorly for
- email, when Tor nodes are run on volunteer machines that also do other
- things, because it's quite hard to configure mail transport agents so
- normal users can send mail normally, but the Tor process can only deliver
- mail locally. Further, most organizations have specific hosts that will
- deliver mail on behalf of certain IP ranges; Tor operators must be aware
- of these hosts and consider putting them in the Tor exit policy.
- The abuse issues on closed (e.g. military) networks are different
- from the abuse on open networks like the Internet. While these IP-based
- access controls are still commonplace on the Internet, on closed networks,
- nearly all participants will be honest, and end-to-end authentication
- can be assumed for anything important.
- Tor is harder than minion because tcp doesn't include an abuse
- address. you could reach inside the http stream and change the agent
- or something, but that's a specific case and probably won't help
- much anyway.
- And volunteer nodes don't resolve to anonymizer.mit.edu so it never
- even occurs to people that it wasn't you.
- Preventing abuse of open exit nodes is an unsolved problem. Princeton's
- CoDeeN project \cite{darkside} gives us a glimpse of what we're in for.
- % This is more speculative than a description of our design.
- but their solutions, which mainly involve rate limiting and blacklisting
- nodes which do bad things, don't translate directly to Tor. Rate limiting
- still works great, but Tor intentionally separates sender from recipient,
- so it's hard to know which sender was the one who did the bad thing,
- without just making the whole network wide open.
- even limiting most nodes to allow http, ssh, and aim to exit and reject
- all other stuff is sketchy, because plenty of abuse can happen over
- port 80. but it's a surprisingly good start, because it blocks most things,
- and because people are more used to the concept of port 80 abuse not
- coming from the machine's owner.
- we could also run intrusion detection system (IDS) modules at each tor
- node, to dynamically monitor traffic streams for attack signatures. it
- can even react when it sees a signature by closing the stream. but IDS's
- don't actually work most of the time, and besides, how do you write a
- signature for "is sending a mean mail"?
- we should run a squid at each exit node, to provide comparable anonymity
- to private exit nodes for cache hits, to speed everything up, and to
- have a buffer for funny stuff coming out of port 80. we could similarly
- have other exit proxies for other protocols, like mail, to check
- delivered mail for being spam.
- [XXX Um, I'm uncomfortable with this for several reasons.
- It's not good for keeping honest nodes honest about discarding
- state after it's no longer needed. Granted it keeps an external
- observer from noticing how often sites are visited, but it also
- allows fishing expeditions. ``We noticed you went to this prohibited
- site an hour ago. Kindly turn over your caches to the authorities.''
- I previously elsewhere suggested bulk transfer proxies to carve
- up big things so that they could be downloaded in less noticeable
- pieces over several normal looking connections. We could suggest
- similarly one or a handful of squid nodes that might serve up
- some of the more sensitive but common material, especially if
- the relevant sites didn't want to or couldn't run their own OR.
- This would be better than having everyone run a squid which would
- just help identify after the fact the different history of that
- node's activity. All this kind of speculation needs to move to
- future work section I guess. -PS]
- A mixture of open and restricted exit nodes will allow the most
- flexibility for volunteers running servers. But while a large number
- of middleman nodes is useful to provide a large and robust network,
- a small number of exit nodes still simplifies traffic analysis because
- there are fewer nodes the adversary needs to monitor, and also puts a
- greater burden on the exit nodes.
- The JAP cascade model is really nice because they only need one node to
- take the heat per cascade. On the other hand, a hydra scheme could work
- better (it's still hard to watch all the clients).
- Discuss importance of public perception, and how abuse affects it.
- ``Usability is a security parameter''. ``Public Perception is also a
- security parameter.''
- Discuss smear attacks.
- \SubSection{Directory Servers}
- \label{subsec:dirservers}
- First-generation Onion Routing designs \cite{or-jsac98,freedom2-arch} did
- % is or-jsac98 the right cite here? what's our stock OR cite? -RD
- in-band network status updates: each router flooded a signed statement
- to its neighbors, which propagated it onward. But anonymizing networks
- have different security goals than typical link-state routing protocols.
- For example, we worry more about delays (accidental or intentional)
- that can cause different parts of the network to have different pictures
- of link-state and topology. We also worry about attacks to deceive a
- client about the router membership list, topology, or current network
- state. Such \emph{partitioning attacks} on client knowledge help an
- adversary with limited resources to efficiently deploy those resources
- when attacking a target.
- Instead, Tor uses a small group of redundant directory servers to
- track network topology and node state such as current keys and exit
- policies. The directory servers are normal onion routers, but there are
- only a few of them and they are more trusted. They listen on a separate
- port as an HTTP server, both so participants can fetch current network
- state and router lists (a \emph{directory}), and so other onion routers
- can upload their router descriptors.
- [[mention that descriptors are signed with long-term keys; ORs publish
- regularly to dirservers; policies for generating directories; key
- rotation (link, onion, identity); Everybody already know directory
- keys; how to approve new nodes (advogato, sybil, captcha (RTT));
- policy for handling connections with unknown ORs; diff-based
- retrieval; diff-based consensus; separate liveness from descriptor
- list]]
- Of course, a variety of attacks remain. An adversary who controls a
- directory server can track certain clients by providing different
- information --- perhaps by listing only nodes under its control
- as working, or by informing only certain clients about a given
- node. Moreover, an adversary without control of a directory server can
- still exploit differences among client knowledge. If Eve knows that
- node $M$ is listed on server $D_1$ but not on $D_2$, she can use this
- knowledge to link traffic through $M$ to clients who have queried $D_1$.
- Thus these directory servers must be synchronized and redundant. The
- software is distributed with the signature public key of each directory
- server, and directories must be signed by a threshold of these keys.
- The directory servers in Tor are modeled after those in Mixminion
- \cite{minion-design}, but our situation is easier. Firstly, we make the
- simplifying assumption that all participants agree on who the directory
- servers are. Secondly, Mixminion needs to predict node behavior ---
- that is, build a reputation system for guessing future performance of
- nodes based on past performance, and then figure out a way to build
- a threshold consensus of these predictions. Tor just needs to get a
- threshold consensus of the current state of the network.
- The threshold consensus can be reached with standard Byzantine agreement
- techniques \cite{castro-liskov}.
- % Should I just stop the section here? Is the rest crap? -RD
- But this library, while more efficient than previous Byzantine agreement
- systems, is still complex and heavyweight for our purposes: we only need
- to compute a single algorithm, and we do not require strict in-order
- computation steps. Indeed, the complexity of Byzantine agreement protocols
- threatens our security, because users cannot easily understand it and
- thus have less trust in the directory servers. The Tor directory servers
- build a consensus directory
- through a simple four-round broadcast protocol. First, each server signs
- and broadcasts its current opinion to the other directory servers; each
- server then rebroadcasts all the signed opinions it has received. At this
- point all directory servers check to see if anybody's cheating. If so,
- directory service stops, the humans are notified, and that directory
- server is permanently removed from the network. Assuming no cheating,
- each directory server then computes a local algorithm on the set of
- opinions, resulting in a uniform shared directory. Then the servers sign
- this directory and broadcast it; and finally all servers rebroadcast
- the directory and all the signatures.
- The rebroadcast steps ensure that a directory server is heard by either
- all of the other servers or none of them (some of the links between
- directory servers may be down). Broadcasts are feasible because there
- are so few directory servers (currently 3, but we expect to use as many
- as 9 as the network scales). The actual local algorithm for computing
- the shared directory is straightforward, and is described in the Tor
- specification \cite{tor-spec}.
- % we should, uh, add this to the spec. oh, and write it. -RD
- Using directory servers rather than flooding approaches provides
- simplicity and flexibility. For example, they don't complicate
- the analysis when we start experimenting with non-clique network
- topologies. And because the directories are signed, they can be cached at
- all the other onion routers (or even elsewhere). Thus directory servers
- are not a performance bottleneck when we have many users, and also they
- won't aid traffic analysis by forcing clients to periodically announce
- their existence to any central point.
- % Mention Hydra as an example of non-clique topologies. -NM, from RD
- % also find some place to integrate that dirservers have to actually
- % lay test circuits and use them, otherwise routers could connect to
- % the dirservers but discard all other traffic.
- % in some sense they're like reputation servers in \cite{mix-acc} -RD
- \Section{Rendezvous points: location privacy}
- \label{sec:rendezvous}
- Rendezvous points are a building block for \emph{location-hidden services}
- (aka responder anonymity) in the Tor network. Location-hidden services
- means Bob can offer a TCP service, such as a webserver, without revealing
- the IP of that service. One motivation for location privacy is to provide
- protection against DDoS attacks: attackers are forced to attack the
- onion routing network as a whole rather than just Bob's IP.
- \subsection{Goals for rendezvous points}
- \label{subsec:rendezvous-goals}
- In addition to our other goals, have tried to provide the following
- properties in our design for location-hidden servers:
- \begin{tightlist}
- \item[Flood-proof:] An attacker should not be able to flood Bob with traffic
- simply by sending may requests to Bob's public location. Thus, Bob needs a
- way to filter incoming requests.
- \item[Robust:] Bob should be able to maintain a long-term pseudonymous
- identity even in the presence of OR failure. Thus, Bob's identity must not
- be tied to a single OR.
- \item[Smear-resistant:] An attacker should not be able to use rendezvous
- points to smear an OR. That is, if a social attacker tries to host a
- location-hidden service that is illegal or disreputable, it should not
- appear---even to a casual observer---that the OR is hosting that service.
- \item[Application-transparent:] Although we are willing to require users to
- run special software to access location-hidden servers, we are not willing
- to require them to modify their applications.
- \end{tightlist}
- \subsection{Rendezvous design}
- We provide location-hiding for Bob by allowing him to advertise several onion
- routers (his \emph{Introduction Points}) as his public location. (He may do
- this on any robust efficient distributed key-value lookup system with
- authenticated updates, such as CFS \cite{cfs:sosp01}.)
- Alice, the client, chooses a node for her \emph{Meeting
- Point}. She connects to one of Bob's introduction points, informs him
- about her rendezvous point, and then waits for him to connect to the
- rendezvous
- point. This extra level of indirection means Bob's introduction points
- don't open themselves up to abuse by serving files directly, eg if Bob
- chooses a node in France to serve material distateful to the French,
- %
- % We need a more legitimate-sounding reason here.
- %
- or if Bob's service tends to get DDoS'ed by script kiddies.
- The extra level of indirection also allows Bob to respond to some requests
- and ignore others.
- We provide the necessary glue so that Alice can view webpages from Bob's
- location-hidden webserver with minimal invasive changes. Both Alice and
- Bob must run local onion proxies.
- The steps of a rendezvous:
- \begin{tightlist}
- \item Bob chooses some Introduction Points, and advertises them on a
- Distributed Hash Table (DHT).
- \item Bob establishes onion routing connections to each of his
- Introduction Points, and waits.
- \item Alice learns about Bob's service out of band (perhaps Bob told her,
- or she found it on a website). She looks up the details of Bob's
- service from the DHT.
- \item Alice chooses and establishes a Rendezvous Point (RP) for this
- transaction.
- \item Alice goes to one of Bob's Introduction Points, and gives it a blob
- (encrypted for Bob) which tells him about herself, the RP
- she chose, and the first half of an ephemeral key handshake. The
- Introduction Point sends the blob to Bob.
- \item Bob chooses whether to ignore the blob, or to onion route to RP.
- Let's assume the latter.
- \item RP plugs together Alice and Bob. Note that RP can't recognize Alice,
- Bob, or the data they transmit (they share a session key).
- \item Alice sends a Begin cell along the circuit. It arrives at Bob's
- onion proxy. Bob's onion proxy connects to Bob's webserver.
- \item Data goes back and forth as usual.
- \end{tightlist}
- When establishing an introduction point, Bob provides the onion router
- with a public ``introduction'' key. The hash of this public key
- identifies a unique service, and (since Bob is required to sign his
- messages) prevents anybody else from usurping Bob's introduction point
- in the future. Bob uses the same public key when establishing the other
- introduction points for that service.
- The blob that Alice gives the introduction point includes a hash of Bob's
- public key to identify the service, an optional initial authentication
- token (the introduction point can do prescreening, eg to block replays),
- and (encrypted to Bob's public key) the location of the rendezvous point,
- a rendezvous cookie Bob should tell RP so he gets connected to
- Alice, an optional authentication token so Bob can choose whether to respond,
- and the first half of a DH key exchange. When Bob connects to RP
- and gets connected to Alice's pipe, his first cell contains the
- other half of the DH key exchange.
- The authentication tokens can be used to provide selective access to users
- proportional to how important it is that they main uninterrupted access
- to the service. During normal situations, Bob's service might simply be
- offered directly from mirrors; Bob also gives out authentication cookies
- to special users. When those mirrors are knocked down by DDoS attacks,
- those special users can switch to accessing Bob's service via the Tor
- rendezvous system.
- \SubSection{Integration with user applications}
- For each service Bob offers, he configures his local onion proxy to know
- the local IP and port of the server, a strategy for authorizating Alices,
- and a public key. (Each onion router could run a node in this lookup
- system; also note that as a stopgap measure, we can just run a simple
- lookup system on the directory servers.) Bob publishes into the DHT
- (indexed by the hash of the public key) the public key, an expiration
- time (``not valid after''), and the current introduction points for that
- service. Note that Bob's webserver is unmodified, and doesn't even know
- that it's hidden behind the Tor network.
- As far as Alice's experience goes, we require that her client interface
- remain a SOCKS proxy, and we require that she shouldn't have to modify
- her applications. Thus we encode all of the necessary information into
- the hostname (more correctly, fully qualified domain name) that Alice
- uses, eg when clicking on a url in her browser. Location-hidden services
- use the special top level domain called `.onion': thus hostnames take the
- form x.y.onion where x encodes the hash of PK, and y is the authentication
- cookie. Alice's onion proxy examines hostnames and recognizes when they're
- destined for a hidden server. If so, it decodes the PK and starts the
- rendezvous as described in the table above.
- \subsection{Previous rendezvous work}
- Ian Goldberg developed a similar notion of rendezvous points for
- low-latency anonymity systems \cite{ian-thesis}. His ``service tag''
- is the same concept as our ``hash of service's public key''. We make it
- a hash of the public key so it can be self-authenticating, and so the
- client can recognize the same service with confidence later on. His
- design differs from ours in the following ways though. Firstly, Ian
- suggests that the client should manually hunt down a current location of
- the service via Gnutella; whereas our use of the DHT makes lookup faster,
- more robust, and transparent to the user. Secondly, in Tor the client
- and server can share ephemeral DH keys, so at no point in the path is
- the plaintext
- exposed. Thirdly, our design is much more practical for deployment in a
- volunteer network, in terms of getting volunteers to offer introduction
- and rendezvous point services. The introduction points do not output any
- bytes to the clients, and the rendezvous points don't know the client,
- the server, or the stuff being transmitted. The indirection scheme
- is also designed with authentication/authorization in mind -- if the
- client doesn't include the right cookie with its request for service,
- the server doesn't even acknowledge its existence.
- \Section{Analysis}
- \label{sec:analysis}
- In this section, we discuss how well Tor meets our stated design goals
- and its resistance to attacks.
- Goals:
- \begin{description}
- \item [Basic Anonymity:] Because traffic is encrypted, changing in
- appearance, and can flow from anywhere to anywhere within the
- network, a simple observer that cannot see both the initiator
- activity and the corresponding activity where the responder talks to
- the network will not be able to link the initiator and responder.
- Nor is it possible to directly correlate any two communication
- sessions as coming from a single source without additional
- information. Resistance to specific anonymity threats will be discussed
- below.
- \item[Deployability:]
- \item[Usability:]
- \item[Flexibility:]
- \item[Conservative design:]
- \end{description}
- Basic
- How well do we resist chosen adversary?
- How well do we meet stated goals?
- Mention jurisdictional arbitrage.
- Pull attacks and defenses into analysis as a subsection
- \Section{Open Questions in Low-latency Anonymity}
- \label{sec:maintaining-anonymity}
- % There must be a better intro than this! -NM
- In addition to the open problems discussed in
- section~\ref{subsec:non-goals}, many other questions remain to be
- solved by future research before we can be truly confident that we
- have built a secure low-latency anonymity service.
- Many of these open issues are questions of balance. For example,
- how often should users rotate to fresh circuits? Too-frequent
- rotation is inefficient and expensive, but too-infrequent rotation
- makes the user's traffic linkable. Instead of opening a fresh
- circuit; clients can also limit linkability exit from a middle point
- of the circuit, or by truncating and re-extending the circuit, but
- more analysis is needed to determine the proper trade-off.
- [XXX mention predecessor attacks?]
- A similar question surrounds timing of directory operations:
- how often should directories be updated? With too-infrequent
- updates clients receive an inaccurate picture of the network; with
- too-frequent updates the directory servers are overloaded.
- %do different exit policies at different exit nodes trash anonymity sets,
- %or not mess with them much?
- %
- %% Why would they? By routing traffic to certain nodes preferentially?
- [XXX Choosing paths and path lengths: I'm not writing this bit till
- Arma's pathselection stuff is in. -NM]
- %%%% Roger said that he'd put a path selection paragraph into section
- %%%% 4 that would replace this.
- %
- %I probably should have noted that this means loops will be on at least
- %five hop routes, which should be rare given the distribution. I'm
- %realizing that this is reproducing some of the thought that led to a
- %default of five hops in the original onion routing design. There were
- %some different assumptions, which I won't spell out now. Note that
- %enclave level protections really change these assumptions. If most
- %circuits are just two hops, then just a single link observer will be
- %able to tell that two enclaves are communicating with high probability.
- %So, it would seem that enclaves should have a four node minimum circuit
- %to prevent trivial circuit insider identification of the whole circuit,
- %and three hop minimum for circuits from an enclave to some nonclave
- %responder. But then... we would have to make everyone obey these rules
- %or a node that through timing inferred it was on a four hop circuit
- %would know that it was probably carrying enclave to enclave traffic.
- %Which... if there were even a moderate number of bad nodes in the
- %network would make it advantageous to break the connection to conduct
- %a reformation intersection attack. Ahhh! I gotta stop thinking
- %about this and work on the paper some before the family wakes up.
- %On Sat, Oct 25, 2003 at 06:57:12AM -0400, Paul Syverson wrote:
- %> Which... if there were even a moderate number of bad nodes in the
- %> network would make it advantageous to break the connection to conduct
- %> a reformation intersection attack. Ahhh! I gotta stop thinking
- %> about this and work on the paper some before the family wakes up.
- %This is the sort of issue that should go in the 'maintaining anonymity
- %with tor' section towards the end. :)
- %Email from between roger and me to beginning of section above. Fix and move.
- Throughout this paper, we have assumed that end-to-end traffic
- analysis cannot yet be defeated. But even high-latency anonymity
- systems can be vulnerable to end-to-end traffic analysis, if the
- traffic volumes are high enough, and if users' habits are sufficiently
- distinct \cite{limits-open,statistical-disclosure}. \emph{What can be
- done to limit the effectiveness of these attacks against low-latency
- systems?} Tor already makes some effort to conceal the starts and
- ends of streams by wrapping all long-range control commands in
- identical-looking relay cells, but more analysis is needed. Link
- padding could frustrate passive observer who count packets; long-range
- padding could work against observers who own the first hop in a
- circuit. But more research needs to be done in order to find an
- efficient and practical approach. Volunteers prefer not to run
- constant-bandwidth padding; but more sophisticated traffic shaping
- approaches remain somewhat unanalyzed. [XXX is this so?] Recent work
- on long-range padding \cite{defensive-dropping} shows promise. One
- could also try to reduce correlation in packet timing by batching and
- re-ordering packets, but it is unclear whether this could improve
- anonymity without introducing so much latency as to render the
- network unusable.
- Even if passive timing attacks were wholly solved, active timing
- attacks would remain. \emph{What can
- be done to address attackers who can introduce timing patterns into
- a user's traffic?} [XXX mention likely approaches]
- %%% I think we cover this by framing the problem as ``Can we make
- %%% end-to-end characteristics of low-latency systems as good as
- %%% those of high-latency systems?'' Eliminating long-term
- %%% intersection is a hard problem.
- %
- %Even regardless of link padding from Alice to the cloud, there will be
- %times when Alice is simply not online. Link padding, at the edges or
- %inside the cloud, does not help for this.
- In order to scale to large numbers of users, and to prevent an
- attacker from observing the whole network at once, it may be necessary
- for low-latency anonymity systems to support far more servers than Tor
- currently anticipates. This introduces several issues. First, if
- approval by a centralized set of directory servers is no longer
- feasible, what mechanism should be used to prevent adversaries from
- signing up many spurious servers? (Tarzan and Morphmix present
- possible solutions.) Second, if clients can no longer have a complete
- picture of the network at all times how do we prevent attackers from
- manipulating client knowledge? Third, if there are to many servers
- for every server to constantly communicate with every other, what kind
- of non-clique topology should the network use? [XXX cite george's
- restricted-routes paper] (Whatever topology we choose, we need some
- way to keep attackers from manipulating their position within it.)
- Fourth, since no centralized authority is tracking server reliability,
- How do we prevent unreliable servers from rendering the network
- unusable? Fifth, do clients receive so much anonymity benefit from
- running their own servers that we should expect them all to do so, or
- do we need to find another incentive structure to motivate them?
- Alternatively, it may be the case that one of these problems proves
- intractable, or that the drawbacks to many-server systems prove
- greater than the benefits. Nevertheless, we may still do well to
- consider non-clique topologies. A cascade topology may provide more
- defense against traffic confirmation confirmation.
- % Why would it? Cite. -NM
- Does the hydra (many inputs, few outputs) topology work
- better? Are we going to get a hydra anyway because most nodes will be
- middleman nodes?
- %%% Do more with this paragraph once The TCP-over-TCP paragraph is
- %%% more integrated into Related works.
- %
- As mentioned in section\ref{where-is-it-now}, Tor could improve its
- robustness against node failure by buffering stream data at the
- network's edges, and performing end-to-end acknowledgments. The
- efficacy of this approach remains to be tested, however, and there
- may be more effective means for ensuring reliable connections in the
- presence of unreliable nodes.
- %%% Keeping this original paragraph for a little while, since it
- %%% is not the same as what's written there now.
- %
- %Because Tor depends on TLS and TCP to provide a reliable transport,
- %when one of the servers goes down, all the circuits (and thus streams)
- %traveling over that server must break. This reduces anonymity because
- %everybody needs to reconnect right then (does it? how much?) and
- %because exit connections all break at the same time, and it also harms
- %usability. It seems the problem is even worse in a peer-to-peer
- %environment, because so far such systems don't really provide an
- %incentive for nodes to stay connected when they're done browsing, so
- %we would expect a much higher churn rate than for onion routing.
- %there ways of allowing streams to survive the loss of a node in the
- %path?
- % Roger or Paul suggested that we say something about incentives,
- % too, but I think that's a better candidate for our future work
- % section. After all, we will doubtlessly learn very much about why
- % people do or don't run and use Tor in the near future. -NM
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Attacks and Defenses}
- \label{sec:attacks}
- Below we summarize a variety of attacks and how well our design withstands
- them.
- \subsubsection*{Passive attacks}
- \begin{tightlist}
- \item \emph{Observing user behavior.}
- \item \emph{End-to-end Timing correlation.}
- \item \emph{End-to-end Size correlation.}
- \item \emph{Website fingerprinting attacks} old onion routing is
- vulnerable to website fingerprinting attacks like david martin's
- from usenix sec and drew's from pet2002. so is tor. we need to send
- some padding or something, including long-range padding (to foil the
- first hop), to solve this. let's hope somebody writes a followup to
- \cite{defensive-dropping} that tells us what, exactly, to do, and why,
- exactly, it helps. but website fingerprinting intersection attacks
- \cite{kesdogan:pet2002} still seem an open problem.
- \item \emph{Option distinguishability.} User configuration options.
- A: We standardize on how clients behave. cite econymics.
- \item sub of the above on exit policy\\
- Partitioning based on exit policy.
- Run a rare exit server/something other people won't allow.
- DOS three of the 4 who would allow a certain exit.
- \item Content analysis. Not our main thing, but, Privoxy to
- anonymization of data stream.
- \end{tightlist}
- \subsubsection*{Active attacks}
- \begin{tightlist}
- \item \emph{Key compromise.} Talk about all three keys. 3 bullets
- \item \emph{Iterated subpoena.} Legal roving adversary. Works bad against
- this because of ephemeral keys. Criticize pets paper in section 2 for
- failing to consider this when describing roving adversary.
- \item \emph{Run recipient.} Be the Web server.
- \item \emph{Run a hostile node.}
- \item \emph{Compromise entire path.} Directory servers controlling admission
- to network. But if you do compromise it, we're toast.
- \item \emph{Selectively DoS OR.} Flood the pipe. We're toast. Rate limiting.
- We can't stop flooding creates through all your neighbors. Router twins
- is a useful fallback, makes you hit all the twins.
- \item \emph{Introduce timing into messages.}
- \item \emph{Tagging attacks.}
- Integrity checking stops this.
- Subcase of running a hostile node:
- the exit node can change the content you're getting to try to
- trick you. similarly, when it rejects you due to exit policy,
- it could give you a bad IP that sends you somewhere else.
- \item \emph{replaying traffic} Can't in Tor. NonSSL anonymizer.
- \item Do bad things with the Tor network, so we are hated and
- get shut down. Now the user you want to watch has to use anonymizer.
- Exit policy's are a start.
- \item Send spam through the network. Exit policy (no open relay) and
- rate limiting. We won't send to more than 8 people at a time. See
- section 5.1.
- we rely on DNS being globally consistent. if people in africa resolve
- IPs differently, then asking to extend a circuit to a certain IP can
- give away your origin.
- \end{tightlist}
- \subsubsection*{Directory attacks}
- \begin{tightlist}
- \item knock out a dirserver
- \item knock out half the dirservers
- \item trick user into using different software (with different dirserver
- keys)
- \item OR connects to the dirservers but nowhere else
- \item foo
- \end{tightlist}
- \subsubsection*{Attacks against rendezvous points}
- \begin{tightlist}
- \item foo
- \end{tightlist}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Future Directions and Open Problems}
- \label{sec:conclusion}
- % Mention that we need to do TCP over tor for reliability.
- Tor brings together many innovations into
- a unified deployable system. But there are still several attacks that
- work quite well, as well as a number of sustainability and run-time
- issues remaining to be ironed out. In particular:
- \begin{itemize}
- \item \emph{Scalability:} Since Tor's emphasis currently is on simplicity
- of design and deployment, the current design won't easily handle more
- than a few hundred servers, because of its clique topology. Restricted
- route topologies \cite{danezis-pets03} promise comparable anonymity
- with much better scaling properties, but we must solve problems like
- how to randomly form the network without introducing net attacks.
- % [cascades are a restricted route topology too. we must mention
- % earlier why we're not satisfied with the cascade approach.]-RD
- % [We do. At least
- \item \emph{Cover traffic:} Currently we avoid cover traffic because
- it introduces clear performance and bandwidth costs, but and its
- security properties are not well understood. With more research
- \cite{SS03,defensive-dropping}, the price/value ratio may change, both for
- link-level cover traffic and also long-range cover traffic. In particular,
- we expect restricted route topologies to reduce the cost of cover traffic
- because there are fewer links to cover.
- \item \emph{Better directory distribution:} Even with the threshold
- directory agreement algorithm described in \ref{subsec:dirservers},
- the directory servers are still trust bottlenecks. We must find more
- decentralized yet practical ways to distribute up-to-date snapshots of
- network status without introducing new attacks.
- \item \emph{Implementing location-hidden servers:} While
- Section~\ref{sec:rendezvous} provides a design for rendezvous points and
- location-hidden servers, this feature has not yet been implemented.
- We will likely encounter additional issues, both in terms of usability
- and anonymity, that must be resolved.
- \item \emph{Wider-scale deployment:} The original goal of Tor was to
- gain experience in deploying an anonymizing overlay network, and learn
- from having actual users. We are now at the point where we can start
- deploying a wider network. We will see what happens!
- % ok, so that's hokey. fix it. -RD
- \item \emph{Further specification review:} Foo.
- \end{itemize}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% commented out for anonymous submission
- %\Section{Acknowledgments}
- % Peter Palfrader for editing
- % Bram Cohen for congestion control discussions
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \bibliographystyle{latex8}
- \bibliography{tor-design}
- \end{document}
- % Style guide:
- % U.S. spelling
- % avoid contractions (it's, can't, etc.)
- % prefer ``for example'' or ``such as'' to e.g.
- % prefer ``that is'' to i.e.
- % 'mix', 'mixes' (as noun)
- % 'mix-net'
- % 'mix', 'mixing' (as verb)
- % 'middleman' [Not with a hyphen; the hyphen has been optional
- % since Middle English.]
- % 'nymserver'
- % 'Cypherpunk', 'Cypherpunks', 'Cypherpunk remailer'
- % 'Onion Routing design', 'onion router' [note capitalization]
- % 'SOCKS'
- % Try not to use \cite as a noun.
- %
- % 'Substitute ``Damn'' every time you're inclined to write ``very;'' your
- % editor will delete it and the writing will be just as it should be.'
- % -- Mark Twain
|