1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993 |
- \documentclass[times,10pt,twocolumn]{article}
- \usepackage{latex8}
- \usepackage{times}
- \usepackage{url}
- \usepackage{graphics}
- \usepackage{amsmath}
- \pagestyle{empty}
- \renewcommand\url{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- \newcommand\emailaddr{\begingroup \def\UrlLeft{<}\def\UrlRight{>}\urlstyle{tt}\Url}
- % If an URL ends up with '%'s in it, that's because the line *in the .bib/.tex
- % file* is too long, so break it there (it doesn't matter if the next line is
- % indented with spaces). -DH
- %\newif\ifpdf
- %\ifx\pdfoutput\undefined
- % \pdffalse
- %\else
- % \pdfoutput=1
- % \pdftrue
- %\fi
- \newenvironment{tightlist}{\begin{list}{$\bullet$}{
- \setlength{\itemsep}{0mm}
- \setlength{\parsep}{0mm}
- % \setlength{\labelsep}{0mm}
- % \setlength{\labelwidth}{0mm}
- % \setlength{\topsep}{0mm}
- }}{\end{list}}
- \begin{document}
- %% Use dvipdfm instead. --DH
- %\ifpdf
- % \pdfcompresslevel=9
- % \pdfpagewidth=\the\paperwidth
- % \pdfpageheight=\the\paperheight
- %\fi
- \title{Tor: The Second-Generation Onion Router}
- % Putting the 'Private' back in 'Virtual Private Network'
- %\author{Roger Dingledine \\ The Free Haven Project \\ arma@freehaven.net \and
- %Nick Mathewson \\ The Free Haven Project \\ nickm@freehaven.net \and
- %Paul Syverson \\ Naval Research Lab \\ syverson@itd.nrl.navy.mil}
- \maketitle
- \thispagestyle{empty}
- \begin{abstract}
- We present Tor, a circuit-based low-latency anonymous communication
- system. This second-generation Onion Routing system addresses limitations
- in the original design. We add perfect forward secrecy, congestion
- control, directory servers, integrity checking, variable exit policies,
- and a practical design for rendezvous points. Tor works on the real-world
- Internet, requires no special privileges or kernel modifications, requires
- little synchronization or coordination between nodes, and provides a
- reasonable trade-off between anonymity, usability, and efficiency. We
- close with a list of open problems in anonymous communication systems.
- \end{abstract}
- %\begin{center}
- %\textbf{Keywords:} anonymity, peer-to-peer, remailer, nymserver, reply block
- %\end{center}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Overview}
- \label{sec:intro}
- Onion Routing is a distributed overlay network designed to anonymize
- low-latency TCP-based applications such as web browsing, secure shell,
- and instant messaging. Clients choose a path through the network and
- build a \emph{virtual circuit}, in which each node (or ``onion router'')
- in the path knows its predecessor and successor, but no other nodes in
- the circuit. Traffic flowing down the circuit is sent in fixed-size
- \emph{cells}, which are unwrapped by a symmetric key at each node
- (like the layers of an onion) and relayed downstream. The original
- Onion Routing project published several design and analysis papers
- \cite{or-ih96,or-jsac98,or-discex00,or-pet00}. While a wide area Onion
- Routing network was deployed for some weeks, the only long-running and
- publicly accessible implementation of the original design was a fragile
- proof-of-concept that ran on a single machine. Even this simple deployment
- processed connections from over sixty thousand distinct IP addresses from
- all over the world at an average rate of about fifty thousand per day.
- But many critical design and deployment issues were never
- resolved, and the design has not been updated in several years. Here
- we describe Tor, a protocol for asynchronous, loosely federated onion
- routers that provides the following improvements over the old Onion
- Routing design:
- \begin{tightlist}
- \item \textbf{Perfect forward secrecy:} The original Onion Routing
- design was vulnerable to a single hostile node recording traffic and
- later compromising successive nodes in the circuit and forcing them
- to decrypt it. Rather than using a single onion to lay each circuit,
- Tor now uses an incremental or \emph{telescoping} path-building design,
- where the initiator negotiates session keys with each successive hop in
- the circuit. Once these keys are deleted, subsequently compromised nodes
- cannot decrypt old traffic. As a side benefit, onion replay detection
- is no longer necessary, and the process of building circuits is more
- reliable, since the initiator knows when a hop fails and can then try
- extending to a new node.
- \item \textbf{Separation of protocol cleaning from anonymity:}
- The original Onion Routing design required a separate ``application
- proxy'' for each supported application protocol---most of which were
- never written, so many applications were never supported. Tor uses the
- standard and near-ubiquitous SOCKS \cite{socks4} proxy interface, allowing
- us to support most TCP-based programs without modification. This design
- change allows Tor to use the filtering features of privacy-enhancing
- application-level proxies such as Privoxy \cite{privoxy} without having
- to incorporate those features itself.
- \item \textbf{Many TCP streams can share one circuit:} The
- original Onion Routing design built a separate circuit for each
- application-level request. This hurt performance by requiring
- multiple public key operations for every request, and also presented
- a threat to anonymity from building so many different circuits; see
- Section~\ref{sec:maintaining-anonymity}. Tor multiplexes multiple TCP
- streams along each virtual circuit, to improve efficiency and anonymity.
- \item \textbf{Leaky-pipe circuit topology:} Through in-band signaling
- within the circuit, Tor initiators can direct traffic to nodes partway
- down the circuit. This novel approach allows both for long-range
- padding to frustrate traffic shape and volume attacks at the initiator
- \cite{defensive-dropping}, and, because circuits are used by more than one
- application, allows traffic to exit the circuit from the middle---thus
- frustrating traffic shape and volume attacks based on observing the end
- of the circuit.
- \item \textbf{No mixing, padding, or traffic shaping:} The original Onion
- Routing design called for batching and reordering the cells arriving from
- each circuit. It also included padding between onion routers and, in a
- later design, between onion proxies (that is, users) and onion routers
- \cite{or-ih96,or-jsac98}. The trade-off between padding protection
- and cost was discussed, but no general padding scheme was suggested. In
- \cite{or-pet00} it was theorized \emph{traffic shaping} would generally
- be used, but details were not provided. Recent research \cite{econymics}
- and deployment experience \cite{freedom21-security} suggest that this
- level of resource use is not practical or economical; and even full link
- padding is still vulnerable \cite{defensive-dropping}. Thus, until we
- have a proven and convenient design for traffic shaping or low-latency
- mixing that will improve anonymity against a realistic adversary, we
- leave these strategies out.
- \item \textbf{Congestion control:} Earlier anonymity designs do not
- address traffic bottlenecks. Unfortunately, typical approaches to
- load balancing and flow control in overlay networks involve inter-node
- control communication and global views of traffic. Tor's decentralized
- congestion control uses end-to-end acks to maintain reasonable anonymity
- while allowing nodes at the edges of the network to detect congestion
- or flooding attacks and send less data until the congestion subsides.
- \item \textbf{Directory servers:} The original Onion Routing design
- planned to flood link-state information through the network---an approach
- that can be unreliable and open to partitioning attacks or outright
- deception. Tor takes a simplified view toward distributing link-state
- information. Certain more trusted onion routers also act as directory
- servers: they provide signed \emph{directories} that describe known
- routers and their availability. Users periodically download these
- directories via HTTP.
- \item \textbf{Variable exit policies:} Tor provides a consistent mechanism
- for each node to specify and advertise a policy describing the hosts
- and ports to which it will connect. These exit policies are critical
- in a volunteer-based distributed infrastructure, because each operator
- is comfortable with allowing different types of traffic to exit the Tor
- network from his node.
- \item \textbf{End-to-end integrity checking:} The original Onion Routing
- design did no integrity checking on data. Any onion router on the
- circuit could change the contents of data cells as they passed by---for
- example, to alter a connection request on the fly so it would connect
- to a different webserver, or to `tag' encrypted traffic and look for
- corresponding corrupted traffic at the network edges \cite{minion-design}.
- Tor hampers these attacks by checking data integrity before it leaves
- the network.
- \item \textbf{Improved robustness to failed nodes:} A failed node
- in the old design meant that circuit-building failed, but thanks to
- Tor's step-by-step circuit building, users can notice failed nodes
- while building circuits and route around them. Additionally, liveness
- information from directories allows users to avoid unreliable nodes in
- the first place.
- \item \textbf{Rendezvous points and location-protected servers:}
- Tor provides an integrated mechanism for responder anonymity via
- location-protected servers. Previous Onion Routing designs included
- long-lived ``reply onions'' that could be used to build virtual circuits
- to a hidden server, but these reply onions did not provide forward
- security, and would become useless if any node in the path went down
- or rotated its keys. In Tor, clients negotiate {\it rendezvous points}
- to connect with hidden servers; reply onions are no longer required.
- \end{tightlist}
- Unlike anonymity systems like Freedom \cite{freedom2-arch}, Tor only
- attempts to anonymize TCP streams. Because it does not require patches
- (or built-in support) in an operating system's network stack, this
- approach has proven valuable to Tor's portability and deployability.
- We have implemented most of the above features. Our source code is
- available under a free license, and, as far as we know, is unencumbered by
- patents. We have recently begun deploying a widespread alpha network
- to test the design in practice, to get more experience with usability
- and users, and to provide a research platform for experimenting with
- new ideas.
- We review previous work in Section~\ref{sec:related-work}, describe
- our goals and assumptions in Section~\ref{sec:assumptions},
- and then address the above list of improvements in
- Sections~\ref{sec:design}-\ref{sec:rendezvous}. We summarize
- in Section~\ref{sec:attacks} how our design stands up to
- known attacks, and conclude with a list of open problems in
- Section~\ref{sec:maintaining-anonymity} and future work for the Onion
- Routing project in Section~\ref{sec:conclusion}.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Related work}
- \label{sec:related-work}
- Modern anonymity systems date to Chaum's Mix-Net design
- \cite{chaum-mix}. Chaum
- proposed hiding the correspondence between sender and recipient by
- wrapping messages in layers of public key cryptography, and relaying them
- through a path composed of ``Mixes.'' These mixes in turn decrypt, delay,
- and re-order messages, before relaying them along the sender-selected
- path towards their destinations.
- Subsequent relay-based anonymity designs have diverged in two
- principal directions. Some have attempted to maximize anonymity at
- the cost of introducing comparatively large and variable latencies,
- including Babel \cite{babel}, Mixmaster \cite{mixmaster-spec}, and
- Mixminion \cite{minion-design}. Because of this
- decision, these \emph{high-latency} networks are well-suited for anonymous
- email, but introduce too much lag for interactive tasks such as web browsing,
- internet chat, or SSH connections.
- Tor belongs to the second category: \emph{low-latency} designs that
- attempt to anonymize interactive network traffic. These systems handle
- a variety of bidirectional protocols. They also provide more convenient
- mail delivery than the high-latency fire-and-forget anonymous email
- networks, because the remote mail server provides explicit delivery
- confirmation. But because these designs typically
- involve many packets that must be delivered quickly, it is
- difficult for them to prevent an attacker who can eavesdrop both ends of the
- communication from correlating the timing and volume
- of traffic entering the anonymity network with traffic leaving it. These
- protocols are also vulnerable against active attacks in which an
- adversary introduces timing patterns into traffic entering the network, and
- looks
- for correlated patterns among exiting traffic.
- Although some work has been done to frustrate
- these attacks,\footnote{
- The most common approach is to pad and limit communication to a constant
- rate, or to limit
- the variation in traffic shape. Doing so can have prohibitive bandwidth
- costs and/or performance limitations.
- } most designs protect primarily against traffic analysis rather than traffic
- confirmation \cite{or-jsac98}---that is, they assume that the attacker is
- attempting to learn who is talking to whom, not to confirm a prior suspicion
- about who is talking to whom.
- The simplest low-latency designs are single-hop proxies such as the
- Anonymizer \cite{anonymizer}, wherein a single trusted server strips the
- data's origin before relaying it. These designs are easy to
- analyze, but require end-users to trust the anonymizing proxy.
- Concentrating the traffic to a single point increases the anonymity set
- (the set of people a given user is hiding among), but it can make traffic
- analysis easier: an adversary need only eavesdrop on the proxy to observe
- the entire system.
- More complex are distributed-trust, circuit-based anonymizing systems.
- In these designs, a user establishes one or more medium-term bidirectional
- end-to-end circuits, and tunnels TCP streams in fixed-size cells.
- Establishing circuits is computationally expensive and typically
- requires public-key
- cryptography, whereas relaying cells is comparatively inexpensive and
- typically requires only symmetric encryption.
- Because a circuit crosses several servers, and each server only knows
- the adjacent servers in the circuit, no single server can link a
- user to her communication partners.
- The Java Anon Proxy (also known as JAP or Web MIXes) uses fixed shared
- routes known as \emph{cascades}. As with a single-hop proxy, this
- approach aggregates users into larger anonymity sets, but again an
- attacker only needs to observe both ends of the cascade to bridge all
- the system's traffic. The Java Anon Proxy's design provides
- protection by padding between end users and the head of the cascade
- \cite{web-mix}. However, it is not demonstrated whether the current
- implementation's padding policy improves anonymity.
- PipeNet \cite{back01, pipenet}, another low-latency design proposed at
- about the same time as the original Onion Routing design, provided
- stronger anonymity at the cost of allowing a single user to shut
- down the network simply by not sending. Low-latency anonymous
- communication has also been designed for other environments such as
- ISDN \cite{isdn-mixes}.
- In P2P designs like Tarzan \cite{tarzan:ccs02} and MorphMix
- \cite{morphmix:fc04}, all participants both generate traffic and relay
- traffic for others. These systems aim to prevent a peer
- or observer from knowing whether a given peer originated a request
- or just relayed it from another peer. While Tarzan and MorphMix use
- layered encryption as above, Crowds \cite{crowds-tissec} simply assumes
- an adversary who cannot observe the initiator: it uses no public-key
- encryption, so nodes on a circuit can read that circuit's traffic.
- Hordes \cite{hordes-jcs} is based on Crowds but also uses multicast
- responses to hide the initiator. Herbivore \cite{herbivore} and P5
- \cite{p5} go even further, requiring broadcast. They make anonymity
- and efficiency trade-offs to make broadcast more practical.
- These systems are designed primarily for communication between peers,
- although Herbivore users can make external connections by
- requesting a peer to serve as a proxy.
- Systems like Freedom and the original Onion Routing build the circuit
- all at once, using a layered ``onion'' of public-key encrypted messages,
- each layer of which provides a set of session keys and the address of the
- next server in the circuit. Tor as described herein, Tarzan, MorphMix,
- Cebolla \cite{cebolla}, and Rennhard's Anonymity Network \cite{anonnet}
- build the circuit
- in stages, extending it one hop at a time.
- Section~\ref{subsubsec:constructing-a-circuit} describes how this
- approach makes perfect forward secrecy feasible.
- Circuit-based anonymity designs must choose which protocol layer
- to anonymize. They may choose to intercept IP packets directly, and
- relay them whole (stripping the source address) along the circuit
- \cite{freedom2-arch,tarzan:ccs02}. Alternatively, like
- Tor, they may accept TCP streams and relay the data in those streams
- along the circuit, ignoring the breakdown of that data into TCP segments
- \cite{morphmix:fc04,anonnet}. Finally, they may accept application-level
- protocols (such as HTTP) and relay the application requests themselves
- along the circuit.
- Making this protocol-layer decision requires a compromise between flexibility
- and anonymity. For example, a system that understands HTTP, such as Crowds,
- can strip
- identifying information from those requests, can take advantage of caching
- to limit the number of requests that leave the network, and can batch
- or encode those requests in order to minimize the number of connections.
- On the other hand, an IP-level anonymizer can handle nearly any protocol,
- even ones unforeseen by their designers (though these systems require
- kernel-level modifications to some operating systems, and so are more
- complex and less portable). TCP-level anonymity networks like Tor present
- a middle approach: they are fairly application neutral (so long as the
- application supports, or can be tunneled across, TCP), but by treating
- application connections as data streams rather than raw TCP packets,
- they avoid the well-known inefficiencies of tunneling TCP over TCP
- \cite{tcp-over-tcp-is-bad}.
- Distributed-trust anonymizing systems need to prevent attackers from
- adding too many servers and thus compromising too many user paths.
- Tor relies on a small set of well-known directory servers, run by
- independent parties, to make decisions about which nodes can
- join. Tarzan and MorphMix allow unknown users to run servers, and use
- a limited resource (like IP addresses) to prevent an attacker from
- controlling too much of the network. Crowds suggests requiring
- written, notarized requests from potential crowd members.
- Anonymous communication is essential for censorship-resistant
- systems like Eternity \cite{eternity}, Free~Haven \cite{freehaven-berk},
- Publius \cite{publius}, and Tangler \cite{tangler}. Tor's rendezvous
- points enable connections between mutually anonymous entities; they
- are a building block for location-hidden servers, which are needed by
- Eternity and Free~Haven.
- % didn't include rewebbers. No clear place to put them, so I'll leave
- % them out for now. -RD
- \Section{Design goals and assumptions}
- \label{sec:assumptions}
- \SubSection{Goals}
- Like other low-latency anonymity designs, Tor seeks to frustrate
- attackers from linking communication partners, or from linking
- multiple communications to or from a single user. Within this
- main goal, however, several design considerations have directed
- Tor's evolution.
- \textbf{Deployability:} The design must be one that can be implemented,
- deployed, and used in the real world. This requirement precludes designs
- that are expensive to run (for example, by requiring more bandwidth
- than volunteers are willing to provide); designs that place a heavy
- liability burden on operators (for example, by allowing attackers to
- implicate onion routers in illegal activities); and designs that are
- difficult or expensive to implement (for example, by requiring kernel
- patches, or separate proxies for every protocol). This requirement also
- precludes systems in which non-anonymous parties (such as websites)
- must run our software. (We do not meet this goal for the current
- rendezvous design,
- however; see Section~\ref{sec:rendezvous}.)
- \textbf{Usability:} A hard-to-use system has fewer users---and because
- anonymity systems hide users among users, a system with fewer users
- provides less anonymity. Usability is thus not only a convenience for Tor:
- it is a security requirement \cite{econymics,back01}. Tor should
- therefore not
- require modifying applications; should not introduce prohibitive delays;
- and should require the user to make as few configuration decisions
- as possible. Finally, Tor should be easily implemented on all commonly used
- platforms; we cannot require users to change their operating system in order
- to be anonymous. (The current Tor implementation runs on Windows and
- assorted Unix-clones including Linux, FreeBSD, and MacOS X.)
- \textbf{Flexibility:} The protocol must be flexible and well-specified,
- so that it can serve as a test-bed for future research in low-latency
- anonymity systems. Many of the open problems in low-latency anonymity
- networks, such as generating dummy traffic or preventing Sybil attacks
- \cite{sybil}, may be solvable independently from the issues solved by
- Tor. Hopefully future systems will not need to reinvent Tor's design.
- (But note that while a flexible design benefits researchers,
- there is a danger that differing choices of extensions will make users
- distinguishable. Experiments should be run on a separate network.)
- \textbf{Simple design:} The protocol's design and security
- parameters must be well-understood. Additional features impose implementation
- and complexity costs; adding unproven techniques to the design threatens
- deployability, readability, and ease of security analysis. Tor aims to
- deploy a simple and stable system that integrates the best well-understood
- approaches to protecting anonymity.
- \SubSection{Non-goals}
- \label{subsec:non-goals}
- In favoring simple, deployable designs, we have explicitly deferred
- several possible goals, either because they are solved elsewhere, or because
- their solution is an open research problem.
- \textbf{Not Peer-to-peer:} Tarzan and MorphMix aim to scale to completely
- decentralized peer-to-peer environments with thousands of short-lived
- servers, many of which may be controlled by an adversary. This approach
- is appealing, but still has many open problems
- \cite{tarzan:ccs02,morphmix:fc04}.
- \textbf{Not secure against end-to-end attacks:} Tor does not claim
- to provide a definitive solution to end-to-end timing or intersection
- attacks. Some approaches, such as running an onion router, may help;
- see Section~\ref{sec:attacks} for more discussion.
- \textbf{No protocol normalization:} Tor does not provide \emph{protocol
- normalization} like Privoxy or the Anonymizer. If anonymization from
- the responder is desired for complex and variable
- protocols such as HTTP, Tor must be layered with a filtering proxy such
- as Privoxy to hide differences between clients, and expunge protocol
- features that leak identity.
- Note that by this separation Tor can also provide connections that
- are anonymous to the network yet authenticated to the responder, for
- example SSH.
- Similarly, Tor does not currently integrate
- tunneling for non-stream-based protocols like UDP; this too must be
- provided by an external service.
- % Actually, tunneling udp over tcp is probably horrible for some apps.
- % Should this get its own non-goal bulletpoint? The motivation for
- % non-goal-ness would be burden on clients / portability. -RD
- % No, leave it as is. -RD
- \textbf{Not steganographic:} Tor does not try to conceal which users are
- sending or receiving communications; it only tries to conceal with whom
- they communicate.
- \SubSection{Threat Model}
- \label{subsec:threat-model}
- A global passive adversary is the most commonly assumed threat when
- analyzing theoretical anonymity designs. But like all practical
- low-latency systems, Tor does not protect against such a strong
- adversary. Instead, we assume an adversary who can observe some fraction
- of network traffic; who can generate, modify, delete, or delay traffic
- on the network; who can operate onion routers of its own; and who can
- compromise some fraction of the onion routers on the network.
- In low-latency anonymity systems that use layered encryption, the
- adversary's typical goal is to observe both the initiator and the
- responder. Passive attackers can confirm a suspicion that Alice is
- talking to Bob if the timing and volume patterns of the traffic on the
- connection are distinct enough; active attackers can induce timing
- signatures on the traffic to \emph{force} distinct patterns. Tor provides
- some defenses against these \emph{traffic confirmation} attacks, for
- example by encouraging users to run their own onion routers, but it does
- not provide complete protection. Rather, we aim to prevent \emph{traffic
- analysis} attacks, where the adversary uses traffic patterns to learn
- which points in the network he should attack.
- Our adversary might try to link an initiator Alice with any of her
- communication partners, or he might try to build a profile of Alice's
- behavior. He might mount passive attacks by observing the edges of the
- network and correlating traffic entering and leaving the network---either
- by relationships in packet timing; relationships in the volume
- of data sent; or relationships in any externally visible user-selected
- options. The adversary can also mount active attacks by compromising
- routers or keys; by replaying traffic; by selectively denying service
- to trustworthy routers to encourage users to send their traffic through
- compromised routers, or denying service to users to see if the traffic
- elsewhere in the
- network stops; or by introducing patterns into traffic that can later be
- detected. The adversary might attack the directory servers to give users
- differing views of network state. Additionally, he can try to decrease
- the network's reliability by attacking nodes or by performing antisocial
- activities from reliable servers and trying to get them taken down;
- making the network unreliable flushes users to other less anonymous
- systems, where they may be easier to attack.
- We consider each of these attacks in more detail below, and summarize
- in Section~\ref{sec:attacks} how well the Tor design defends against
- each of them.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{The Tor Design}
- \label{sec:design}
- The Tor network is an overlay network; each node is called an onion router
- (OR). Onion routers run as normal user-level processes without needing
- any special
- privileges. Currently, each OR maintains a long-term TLS \cite{TLS}
- connection to every other
- OR. (We further discuss this clique-topology assumption in
- Section~\ref{sec:maintaining-anonymity}.) A subset of the ORs also act as
- directory servers, tracking which routers are in the network;
- see Section~\ref{subsec:dirservers} for directory server details.
- Each user
- runs local software called an onion proxy (OP) to fetch directories,
- establish paths (called \emph{virtual circuits}) across the network,
- and handle connections from user applications. These onion proxies accept
- TCP streams and multiplex them across the virtual circuit. The onion
- router on the other side
- % I don't mean other side, I mean wherever it is on the circuit. But
- % don't want to introduce complexity this early? Hm. -RD
- of the circuit connects to the destinations of
- the TCP streams and relays data.
- Each onion router uses three public keys: a long-term identity key, a
- short-term onion key, and a short-term link key. The identity
- (signing) key is used to sign TLS certificates, to sign its router
- descriptor (a summary of its keys, address, bandwidth, exit policy,
- etc), and to sign directories if it is a directory server. Changing
- the identity key of a router is considered equivalent to creating a
- new router. The onion (decryption) key is used for decrypting requests
- from users to set up a circuit and negotiate ephemeral keys. Finally,
- link keys are used by the TLS protocol when communicating between
- onion routers. Each short-term key is rotated periodically and
- independently, to limit the impact of key compromise.
- Section~\ref{subsec:cells} discusses the structure of the fixed-size
- \emph{cells} that are the unit of communication in Tor. We describe
- in Section~\ref{subsec:circuits} how virtual circuits are
- built, extended, truncated, and destroyed. Section~\ref{subsec:tcp}
- describes how TCP streams are routed through the network, and finally
- Section~\ref{subsec:congestion} talks about congestion control and
- fairness issues.
- \SubSection{Cells}
- \label{subsec:cells}
- Onion routers communicate with one another, and with users' OPs, via TLS
- connections with ephemeral keys. This prevents an attacker from
- impersonating an OR, conceals the contents of the connection with
- perfect forward secrecy, and prevents an attacker from modifying data
- on the wire.
- Traffic passes along these connections in fixed-size cells. Each cell
- is 256 bytes (but see Section~\ref{sec:conclusion} for a discussion of
- allowing large cells and small cells on the same network), and
- consists of a header and a payload. The header includes a circuit
- identifier (circID) that specifies which circuit the cell refers to
- (many circuits can be multiplexed over the single TLS connection), and
- a command to describe what to do with the cell's payload. (Circuit
- identifiers are connection-specific: each single circuit has a different
- circID on each OP/OR or OR/OR connection it traverses.)
- Based on their command, cells are either \emph{control} cells, which are
- always interpreted by the node that receives them, or \emph{relay} cells,
- which carry end-to-end stream data. The control cell commands are:
- \emph{padding} (currently used for keepalive, but also usable for link
- padding); \emph{create} or \emph{created} (used to set up a new circuit);
- and \emph{destroy} (to tear down a circuit).
- Relay cells have an additional header (the relay header) after the
- cell header, containing the stream identifier (many streams can
- be multiplexed over a circuit); an end-to-end checksum for integrity
- checking; the length of the relay payload; and a relay command.
- The entire contents of the relay header and the relay cell payload
- are encrypted or decrypted together as the relay cell moves along the
- circuit, using the 128-bit AES cipher in counter mode to generate a
- cipher stream.
- The
- relay commands are: \emph{relay
- data} (for data flowing down the stream), \emph{relay begin} (to open a
- stream), \emph{relay end} (to close a stream cleanly), \emph{relay
- teardown} (to close a broken stream), \emph{relay connected}
- (to notify the OP that a relay begin has succeeded), \emph{relay
- extend} and \emph{relay extended} (to extend the circuit by a hop,
- and to acknowledge), \emph{relay truncate} and \emph{relay truncated}
- (to tear down only part of the circuit, and to acknowledge), \emph{relay
- sendme} (used for congestion control), and \emph{relay drop} (used to
- implement long-range dummies).
- We describe each of these cell types and commands in more detail below.
- \SubSection{Circuits and streams}
- \label{subsec:circuits}
- The original Onion Routing design built one circuit for each
- TCP stream. Because building a circuit can take several tenths of a
- second (due to public-key cryptography delays and network latency),
- this design imposed high costs on applications like web browsing that
- open many TCP streams.
- In Tor, each circuit can be shared by many TCP streams. To avoid
- delays, users construct circuits preemptively. To limit linkability
- among their streams, users' OPs build a new circuit
- periodically if the previous one has been used,
- and expire old used circuits that no longer have any open streams.
- OPs consider making a new circuit once a minute: thus
- even heavy users spend a negligible amount of time and CPU in
- building circuits, but only a limited number of requests can be linked
- to each other through a given exit node. Also, because circuits are built
- in the background, OPs can recover from failed circuit creation
- without delaying streams and thereby harming user experience.
- \subsubsection{Constructing a circuit}
- \label{subsubsec:constructing-a-circuit}
- A user's OP constructs a circuit incrementally, negotiating a
- symmetric key with each OR on the circuit, one hop at a time. To begin
- creating a new circuit, the OP (call her Alice) sends a
- \emph{create} cell to the first node in her chosen path (call him Bob).
- (She chooses a new
- circID $C_{AB}$ not currently used on the connection from her to Bob.)
- This cell's
- payload contains the first half of the Diffie-Hellman handshake
- ($g^x$), encrypted to the onion key of the OR (call him Bob). Bob
- responds with a \emph{created} cell containing the second half of the
- DH handshake, along with a hash of the negotiated key $K=g^{xy}$.
- Once the circuit has been established, Alice and Bob can send one
- another relay cells encrypted with the negotiated
- key.\footnote{Actually, the negotiated key is used to derive two
- symmetric keys: one for each direction.} More detail is given in
- the next section.
- To extend the circuit further, Alice sends a \emph{relay extend} cell
- to Bob, specifying the address of the next OR (call her Carol), and
- an encrypted $g^{x_2}$ for her. Bob copies the half-handshake into a
- \emph{create} cell, and passes it to Carol to extend the circuit.
- (Bob chooses a new circID $C_{BC}$ not currently used on the connection
- between him and Carol. Alice never needs to know this circID; only Bob
- associates $C_{AB}$ on his connection with Alice to $C_{BC}$ on
- his connection with Carol.)
- When Carol responds with a \emph{created} cell, Bob wraps the payload
- into a \emph{relay extended} cell and passes it back to Alice. Now
- the circuit is extended to Carol, and Alice and Carol share a common key
- $K_2 = g^{x_2 y_2}$.
- To extend the circuit to a third node or beyond, Alice
- proceeds as above, always telling the last node in the circuit to
- extend one hop further.
- % XXX Briefly mention path selection and path length.
- This circuit-level handshake protocol achieves unilateral entity
- authentication (Alice knows she's handshaking with the OR, but
- the OR doesn't care who is opening the circuit---Alice has no key
- and is trying to remain anonymous) and unilateral key authentication
- (Alice and the OR agree on a key, and Alice knows the OR is the
- only other entity who should know it). It also achieves forward
- secrecy and key freshness. More formally, the protocol is as follows
- (where $E_{PK_{Bob}}(\cdot)$ is encryption with Bob's public key,
- $H$ is a secure hash function, and $|$ is concatenation):
- \begin{equation}
- \begin{aligned}
- \mathrm{Alice} \rightarrow \mathrm{Bob}&: E_{PK_{Bob}}(g^x) \\
- \mathrm{Bob} \rightarrow \mathrm{Alice}&: g^y, H(K | \mathrm{``handshake"}) \\
- \end{aligned}
- \end{equation}
- In the second step, Bob proves that it was he who who received $g^x$,
- and who came up with $y$. We use PK encryption in the first step
- (rather than, say, using the first two steps of STS, which has a
- signature in the second step) because a single cell is too small to
- hold both a public key and a signature. Preliminary analysis with the
- NRL protocol analyzer \cite{meadows96} shows the above protocol to be
- secure (including providing perfect forward secrecy) under the
- traditional Dolev-Yao model.
- \subsubsection{Relay cells}
- Once Alice has established the circuit (so she shares keys with each
- OR on the circuit), she can send relay cells. Recall that every relay
- cell has a stream ID in the relay header that indicates to which
- stream the cell belongs. This stream ID allows a relay cell to be
- addressed to any of the ORs on the circuit. Upon receiving a relay
- cell, an OR looks up the corresponding circuit, and decrypts the relay
- header and payload with the appropriate session key for that circuit.
- If the cell is headed downstream (away from Alice) it then checks
- whether the decrypted stream ID is recognized---either because it
- corresponds to an open stream at this OR for the circuit, or because
- it is equal to the control stream ID zero. If the OR recognizes the
- stream ID, it accepts the relay cell and processes it as described
- below. Otherwise, the relay cell must be intended for another OR on
- the circuit. In this case, the OR looks up the circID and OR for the
- next step in the circuit, replaces the circID as appropriate, and
- sends the decrypted relay cell to the next OR. (If the OR at the end
- of the circuit receives an unrecognized relay cell, an error has
- occurred, and the cell is discarded.)
- OPs treat incoming relay cells similarly: they iteratively unwrap the
- relay header and payload with each of the session keys shared with the
- ORs on the circuit, from the closest to farthest. (Because we use a
- stream cipher, encryption operations may be inverted in any order.)
- If at any stage the OP recognizes the stream ID, the cell must have
- originated at the OR whose encryption has just been removed.
- To construct a relay cell addressed to a given OR, Alice iteratively
- encrypts the cell payload (that is, the relay header and payload) with
- the symmetric key of each hop up to that OR. Because the stream ID is
- encrypted to a different value at each step, only at the targeted OR
- will it have a meaningful value.\footnote{
- % XXX Should we just say that 2^56 is itself negligible?
- % XXX Assuming 4-hop circuits with 10 streams per hop, there are 33
- % XXX possible bad stream IDs before the last circuit. This still
- % XXX gives an error only once every 2 million terabytes (approx).
- There is a small probability ($\approx 2^56$ per cell, since stream
- IDs are seven bytes long) that an encrypted stream ID will
- accidentally correspond to a real stream. If such a cell were sent
- along the circuit, it would be recognized by an earlier OR than
- intended, which would then erroneously act upon the cell's encrypted
- contents. To prevent this case, when the OP notices that such a
- collision is about to occur, it sends a padding cell instead to
- advance the circuit's stream ciphers, and tries again. The chances
- of two such collisions in a row are negligible. To prevent stream
- ID collision on relay cells moving \emph{toward} Alice, ORs ignore
- stream IDs on those cells---all of them are for Alice's OP.
- % XXX But what if there is a collision _at_ Alice's OP? Nothing we
- % XXX can do. Then why do we do anything in this case? -NM
- }
- This \emph{leaky pipe} circuit topology
- allows Alice's streams to exit at different ORs on a single circuit.
- Alice may choose different exit points because of their exit policies,
- or to keep the ORs from knowing that two streams
- originate at the same person.
- When an OR later replies to Alice with a relay cell, it only needs to
- encrypt the cell's relay header and payload with the single key it
- shares with Alice, and send the cell back towards Alice along the
- circuit. Subsequent ORs add further layers of encryption as they
- relay the cell back to Alice.
- To tear down a whole circuit, Alice sends a \emph{destroy} control
- cell. Each OR in the circuit receives the \emph{destroy} cell, closes
- all open streams on that circuit, and passes a new \emph{destroy} cell
- forward. But just as circuits are built incrementally, they can also
- be torn down incrementally: Alice can instead send a \emph{relay
- truncate} cell to a single OR on the circuit. That node then sends a
- \emph{destroy} cell forward, and replies with an acknowledgment (a
- \emph{relay truncated} cell). By truncating a circuit, Alice could
- later extend it to different nodes without signaling to the first few
- nodes (or somebody observing them) that she was changing her
- circuit. Nodes in the middle of a circuit are not even aware that the
- circuit has been truncated, because they see only the encrypted relay
- cells. Similarly, if a node on the circuit goes down, the adjacent
- node can send a \emph{relay truncated} cell back to Alice. Thus the
- ``break a node and see which circuits go down'' attack is weakened.
- \SubSection{Opening and closing streams}
- \label{subsec:tcp}
- When Alice's application wants to open a TCP connection to a given
- address and port, it asks the OP (via SOCKS) to make the
- connection. The OP chooses the newest open circuit (or creates one if
- none is available), chooses a suitable OR on that circuit to be the
- exit node (usually the last node, but maybe others due to exit policy
- conflicts; see Section~\ref{subsec:exitpolicies}), chooses a new
- random stream ID for the stream, and sends a \emph{relay begin} cell
- to that exit node. The OP uses a stream ID of zero for the begin cell
- (so the OR will recognize it), and uses that stream ID, destination
- address, and port as the contents of the cell's payload. Once the
- exit node completes the connection to the remote host, it responds
- with a \emph{relay connected} cell. Upon receipt, the OP begins
- accepting data from the application's TCP stream, packaging into
- \emph{relay data} cells, and sending those cells along the circuit to
- the chosen OR.
- There's a catch to using SOCKS, however---some applications pass the
- alphanumeric hostname to the proxy, while others resolve it into an IP
- address first and then pass the IP address to the proxy. If the
- application does the DNS resolution first, Alice will thereby
- broadcast her destination to the DNS server. Common applications
- like Mozilla and SSH have this flaw.
- In the case of Mozilla, the flaw is easy to address: the filtering web
- proxy called Privoxy does the SOCKS call safely, and Mozilla talks to
- Privoxy safely. But a portable general solution, such as is needed for
- SSH, is
- an open problem. We could modify the local nameserver, but this approach
- is invasive, brittle, and not portable. We could encourage the resolver
- library to do its resolution via TCP rather than UDP, but this approach is
- hard to do right, and also has portability problems. We could provide a
- tool similar to \emph{dig} to perform a private lookup through the
- Tor network. Our current answer is to encourage the use of
- privacy-aware proxies like Privoxy wherever possible.
- Closing a Tor stream is analogous to closing a TCP stream: it uses a
- two-step handshake for normal operation, or a one-step handshake for
- errors. If one side of the stream closes abnormally, that node simply
- sends a \emph{relay teardown} cell, and tears down the stream. If one
- side of the stream closes the connection normally, that node sends a
- \emph{relay end} cell down the circuit. When the other side has sent
- back its own \emph{relay end}, the stream can be torn down. Because
- all relay cells use layered encryption, only the destination OR knows
- that a given relay cell is a request to close a stream. This two-step
- handshake allows for TCP-based applications that use half-open
- connections, such as HTTP clients that close their side of the stream
- after writing, but are still willing to read.
- \SubSection{Integrity checking on streams}
- Because the old Onion Routing design used a stream cipher, traffic was
- vulnerable to a malleability attack: even though the attacker could not
- decrypt cells, he could make changes to an encrypted
- cell to create corresponding changes to the data leaving the network.
- (Even an external adversary could do this, despite link encryption, by
- inverting bits on the wire.)
- This weakness allowed an adversary to change a padding cell to a destroy
- cell; change the destination address in a relay begin cell to the
- adversary's webserver; or change a user on an ftp connection from
- typing ``dir'' to typing ``delete~*''. Any node or external adversary
- along the circuit could introduce such corruption in a stream---if it
- knew or could guess the encrypted content.
- Tor prevents external adversaries from mounting this attack by
- using TLS on its links, which provides integrity checking.
- Addressing the insider malleability attack, however, is
- more complex.
- We could do integrity checking of the relay cells at each hop, either
- by including hashes or by using an authenticating cipher mode like EAX
- \cite{eax}, but these approaches would impose a
- message-expansion overhead at each hop, and we don't want to
- leak the path length or force a maximum path length.\footnote{
- Also note that these solutions would only check traffic coming from
- Alice; ORs would not be able to include suitable hashes for the
- intermediate hops, since the ORs on a circuit do not share session
- keys.}
- Because we have already accepted that our design is vulnerable to end-to-end
- timing attacks, we can perform integrity checking only at the edges of
- each stream, without introducing any new anonymity-breaking attacks. When Alice
- negotiates a key with a new hop, they both initialize a pair of SHA-1
- digests with a derivative of that key,
- thus beginning with randomness that only the two of them know. From
- then on they each incrementally add to the SHA-1 digests the contents of
- all relay cells they create or accept (one digest is for cells
- created; one is for cells accepted), and include with each relay cell
- the first 4 bytes of the current value of the hash of cells created.
- To be sure of removing or modifying a cell, the attacker must be able
- to either deduce the current digest state (which depends on all
- traffic between Alice and Bob, starting with their negotiated key).
- Attacks on SHA-1 where the adversary can incrementally add to a hash
- to produce a new valid hash don't work, because all hashes are
- end-to-end encrypted across the circuit. The computational overhead
- of computing the digests is minimal compared to doing the AES
- encryption performed at each hop of the circuit. We use only four
- bytes per cell to minimize overhead; the chance that an adversary will
- correctly guess a valid hash, plus the payload the current cell, is
- acceptably low, given that Alice or Bob tear down the circuit if they
- receive a bad hash.
- \SubSection{Rate limiting and fairness}
- Volunteers are generally more willing to run services that can limit
- their bandwidth usage. To accommodate them, Tor servers use a ``token
- bucket'' approach to limit the number of bytes they
- % XXX cite token bucket? -RD
- % XXX No. -RD, later, via NM.
- receive. Tokens are added to the bucket each second; when the bucket is
- full, new tokens are discarded. Each token represents permission to
- accept one byte from the network---to accept a byte, the connection
- must remove a token from the bucket. Thus if the bucket is empty, that
- connection must wait until more tokens arrive. The number of tokens we
- add enforces a long-term average rate of incoming bytes, while still
- permitting short-term bursts above the allowed bandwidth. Current bucket
- sizes are set to ten seconds' worth of traffic.
- Further, we want to avoid starving any Tor streams. Entire circuits
- could starve if we read greedily from connections and one connection
- uses all the remaining bandwidth. We solve this by dividing the number
- of tokens in the bucket by the number of connections that want to read,
- and reading at most that number of bytes from each connection. We iterate
- this procedure until the number of tokens in the bucket is under some
- threshold (currently 10KB), at which point we greedily read from connections.
- Because the Tor protocol generates roughly the same number of outgoing
- bytes as incoming bytes, it is generally sufficient in practice to rate-limit
- incoming bytes.
- % Is it? Fun attack: I send you lots of 1-byte-at-a-time TCP segments.
- % In response, you send lots of 256 byte cells. Can I use this to
- % make you exceed your outgoing bandwidth limit by a factor of 256? -NM
- % Can we resolve this by, when reading from edge connections, rounding up
- % the bytes read (wrt buckets) to the nearest multiple of 256? -RD
- % How's this? -NM
- With TCP connections, however, the correspondence is not one-to-one:
- relaying a single incoming byte can require an entire 256-byte cell.
- (If we waited too long for more bytes to fill the cell, we might stall
- the protocol while the local application waits for a response to the
- byte we never deliver.) Therefore, we treat this case as if the entire
- cell size had been read, regardless of the fullness of the cell.
- Further, inspired by Rennhard et al's design in \cite{anonnet}, a
- circuit's edges heuristically distinguish interactive streams from bulk
- streams by comparing the frequency with which they supply cells. We can
- provide good latency for interactive streams by giving them preferential
- service, while still getting good overall throughput to the bulk
- streams. Such preferential treatment presents a possible end-to-end
- attack, but an adversary who can observe both
- ends of the stream can already learn this information through timing
- attacks.
- \SubSection{Congestion control}
- \label{subsec:congestion}
- Even with bandwidth rate limiting, we still need to worry about
- congestion, either accidental or intentional. If enough users choose the
- same OR-to-OR connection for their circuits, that connection can become
- saturated. For example, an adversary could make a large HTTP PUT request
- through the onion routing network to a webserver he runs, and then
- refuse to read any of the bytes at the webserver end of the
- circuit. Without some congestion control mechanism, these bottlenecks
- can propagate back through the entire network. We describe our
- responses below.
- \subsubsection{Circuit-level throttling}
- To control a circuit's bandwidth usage, each OR keeps track of two
- windows. The \emph{packaging window} tracks how many relay data cells the OR is
- allowed to package (from outside TCP streams) for transmission back to the OP,
- and the \emph{delivery window} tracks how many relay data cells it is willing
- to deliver to TCP streams outside the network. Each window is initialized
- (say, to 1000 data cells). When a data cell is packaged or delivered,
- the appropriate window is decremented. When an OR has received enough
- data cells (currently 100), it sends a \emph{relay sendme} cell towards the OP,
- with stream ID zero. When an OR receives a \emph{relay sendme} cell with stream
- ID zero, it increments its packaging window. Either of these cells
- increments the corresponding window by 100. If the packaging window
- reaches 0, the OR stops reading from TCP connections for all streams
- on the corresponding circuit, and sends no more relay data cells until
- receiving a \emph{relay sendme} cell.
- The OP behaves identically, except that it must track a packaging window
- and a delivery window for every OR in the circuit. If a packaging window
- reaches 0, it stops reading from streams destined for that OR.
- \subsubsection{Stream-level throttling}
- The stream-level congestion control mechanism is similar to the
- circuit-level mechanism above. ORs and OPs use \emph{relay sendme} cells
- to implement end-to-end flow control for individual streams across
- circuits. Each stream begins with a packaging window (currently 500 cells),
- and increments the window by a fixed value (50) upon receiving a \emph{relay
- sendme} cell. Rather than always returning a \emph{relay sendme} cell as soon
- as enough cells have arrived, the stream-level congestion control also
- has to check whether data has been successfully flushed onto the TCP
- stream; it sends a \emph{relay sendme} only when the number of bytes pending
- to be flushed is under some threshold (currently 10 cells worth).
- Currently, non-data relay cells do not affect the windows. Thus we
- avoid potential deadlock issues, for example, arising because a stream
- can't send a \emph{relay sendme} cell when its packaging window is empty.
- % XXX Bad heading
- % XXX Incorporate this section elsewhere.
- \subsubsection{Needs more research}
- We don't need to reimplement full TCP windows (with sequence numbers,
- the ability to drop cells when we're full and retransmit later, etc),
- because the TCP streams already guarantee in-order delivery of each
- cell. But we need to investigate further the effects of the current
- parameters on throughput and latency, while also keeping privacy in mind;
- see Section~\ref{sec:maintaining-anonymity} for more discussion.
- \Section{Other design decisions}
- \SubSection{Resource management and denial-of-service}
- \label{subsec:dos}
- Providing Tor as a public service provides many opportunities for an
- attacker to mount denial-of-service attacks against the network. While
- flow control and rate limiting (discussed in
- Section~\ref{subsec:congestion}) prevent users from consuming more
- bandwidth than routers are willing to provide, opportunities remain for
- users to
- consume more network resources than their fair share, or to render the
- network unusable for other users.
- First of all, there are several CPU-consuming denial-of-service
- attacks wherein an attacker can force an OR to perform expensive
- cryptographic operations. For example, an attacker who sends a
- \emph{create} cell full of junk bytes can force an OR to perform an RSA
- decrypt. Similarly, an attacker can
- fake the start of a TLS handshake, forcing the OR to carry out its
- (comparatively expensive) half of the handshake at no real computational
- cost to the attacker.
- Several approaches exist to address these attacks. First, ORs may
- require clients to solve a puzzle \cite{puzzles-tls} while beginning new
- TLS handshakes or accepting \emph{create} cells. So long as these
- tokens are easy to verify and computationally expensive to produce, this
- approach limits the attack multiplier. Additionally, ORs may limit
- the rate at which they accept create cells and TLS connections, so that
- the computational work of processing them does not drown out the (comparatively
- inexpensive) work of symmetric cryptography needed to keep cells
- flowing. This rate limiting could, however, allow an attacker
- to slow down other users when they build new circuits.
- % What about link-to-link rate limiting?
- Attackers also have an opportunity to attack the Tor network by mounting
- attacks on its hosts and network links. Disrupting a single circuit or
- link breaks all currently open streams passing along that part of the
- circuit. Indeed, this same loss of service occurs when a router crashes
- or its operator restarts it. The current Tor design treats such attacks
- as intermittent network failures, and depends on users and applications
- to respond or recover as appropriate. A future design could use an
- end-to-end TCP-like acknowledgment protocol, so that no streams are
- lost unless the entry or exit point itself is disrupted. This solution
- would require more buffering at the network edges, however, and the
- performance and anonymity implications from this extra complexity still
- require investigation.
- \SubSection{Exit policies and abuse}
- \label{subsec:exitpolicies}
- %XXX originally, we planned to put the "users only know the hostname,
- % not the IP, but exit policies are by IP" problem here too. Worth
- % while still? -RD
- Exit abuse is a serious barrier to wide-scale Tor deployment. Anonymity
- presents would-be vandals and abusers with an opportunity to hide
- the origins of their activities. Attackers can harm the Tor network by
- implicating exit servers for their abuse. Also, applications that commonly
- use IP-based authentication (such as institutional mail or web servers)
- can be fooled by the fact that anonymous connections appear to originate
- at the exit OR.
- We stress that Tor does not enable any new class of abuse. Spammers
- and other attackers already have access to thousands of misconfigured
- systems worldwide, and the Tor network is far from the easiest way
- to launch these antisocial or illegal attacks.
- %Indeed, because of its limited
- %anonymity, Tor is probably not a good way to commit crimes.
- But because the
- onion routers can easily be mistaken for the originators of the abuse,
- and the volunteers who run them may not want to deal with the hassle of
- repeatedly explaining anonymity networks, we must block or limit attacks
- and other abuse that travel through the Tor network.
- To mitigate abuse issues, in Tor, each onion router's \emph{exit policy}
- describes to which external addresses and ports the router will permit
- stream connections. On one end of the spectrum are \emph{open exit}
- nodes that will connect anywhere. On the other end are \emph{middleman}
- nodes that only relay traffic to other Tor nodes, and \emph{private exit}
- nodes that only connect to a local host or network. Using a private
- exit (if one exists) is a more secure way for a client to connect to a
- given host or network---an external adversary cannot eavesdrop traffic
- between the private exit and the final destination, and so is less sure of
- Alice's destination and activities. Most onion routers will function as
- \emph{restricted exits} that permit connections to the world at large,
- but prevent access to certain abuse-prone addresses and services. In
- general, nodes can require a variety of forms of traffic authentication
- \cite{or-discex00}.
- %The abuse issues on closed (e.g. military) networks are different
- %from the abuse on open networks like the Internet. While these IP-based
- %access controls are still commonplace on the Internet, on closed networks,
- %nearly all participants will be honest, and end-to-end authentication
- %can be assumed for important traffic.
- Many administrators will use port restrictions to support only a
- limited set of well-known services, such as HTTP, SSH, or AIM.
- This is not a complete solution, since abuse opportunities for these
- protocols are still well known. Nonetheless, the benefits are real,
- since administrators seem used to the concept of port 80 abuse not
- coming from the machine's owner.
- A further solution may be to use proxies to clean traffic for certain
- protocols as it leaves the network. For example, much abusive HTTP
- behavior (such as exploiting buffer overflows or well-known script
- vulnerabilities) can be detected in a straightforward manner.
- Similarly, one could run automatic spam filtering software (such as
- SpamAssassin) on email exiting the OR network.
- ORs may also choose to rewrite exiting traffic in order to append
- headers or other information to indicate that the traffic has passed
- through an anonymity service. This approach is commonly used
- by email-only anonymity systems. When possible, ORs can also
- run on servers with hostnames such as {\it anonymous}, to further
- alert abuse targets to the nature of the anonymous traffic.
- A mixture of open and restricted exit nodes will allow the most
- flexibility for volunteers running servers. But while many
- middleman nodes help provide a large and robust network,
- having only a few exit nodes reduces the number of points
- an adversary needs to monitor for traffic analysis, and places a
- greater burden on the exit nodes. This tension can be seen in the
- Java Anon Proxy
- cascade model, wherein only one node in each cascade needs to handle
- abuse complaints---but an adversary only needs to observe the entry
- and exit of a cascade to perform traffic analysis on all that
- cascade's users. The Hydra model (many entries, few exits) presents a
- different compromise: only a few exit nodes are needed, but an
- adversary needs to work harder to watch all the clients; see
- Section~\ref{sec:conclusion}.
- Finally, we note that exit abuse must not be dismissed as a peripheral
- issue: when a system's public image suffers, it can reduce the number
- and diversity of that system's users, and thereby reduce the anonymity
- of the system itself. Like usability, public perception is also a
- security parameter. Sadly, preventing abuse of open exit nodes is an
- unsolved problem, and will probably remain an arms race for the
- forseeable future. The abuse problems faced by Princeton's CoDeeN
- project \cite{darkside} give us a glimpse of likely issues.
- \SubSection{Directory Servers}
- \label{subsec:dirservers}
- First-generation Onion Routing designs \cite{freedom2-arch,or-jsac98} used
- in-band network status updates: each router flooded a signed statement
- to its neighbors, which propagated it onward. But anonymizing networks
- have different security goals than typical link-state routing protocols.
- For example, delays (accidental or intentional)
- that can cause different parts of the network to have different pictures
- of link-state and topology are not only inconvenient---they give
- attackers an opportunity to exploit differences in client knowledge.
- We also worry about attacks to deceive a
- client about the router membership list, topology, or current network
- state. Such \emph{partitioning attacks} on client knowledge help an
- adversary to efficiently deploy resources
- when attacking a target.
- Tor uses a small group of redundant, well-known onion routers to
- track changes in network topology and node state, including keys and
- exit policies. Each such \emph{directory server} also acts as an HTTP
- server, so participants can fetch current network state and router
- lists (a \emph{directory}), and so other onion routers can upload
- their router descriptors. Onion routers periodically publish signed
- statements of their state to each directory server, which combines this
- state information with its own view of network liveness, and generates
- a signed description of the entire network state. Client software is
- pre-loaded with a list of the directory servers and their keys; it uses
- this information to bootstrap each client's view of the network.
- When a directory server receives a signed statement from an onion
- router, it recognizes the onion router by its identity key. Directory
- servers do not automatically advertise unrecognized ORs. (If they did,
- an adversary could take over the network by creating many servers
- \cite{sybil}.) Instead, new nodes must be approved by the directory
- server administrator before they are included. Mechanisms for automated
- node approval are an area of active research, and are discussed more
- in Section~\ref{sec:maintaining-anonymity}.
-
- Of course, a variety of attacks remain. An adversary who controls
- a directory server can track certain clients by providing different
- information---perhaps by listing only nodes under its control, or by
- informing only certain clients about a given node. Even an external
- adversary can exploit differences in client knowledge: clients who use
- a node listed on one directory server but not the others are vulnerable.
- Thus these directory servers must be synchronized and redundant.
- Directories are valid if they are signed by a threshold of the directory
- servers.
- The directory servers in Tor are modeled after those in Mixminion
- \cite{minion-design}, but our situation is easier. First, we make the
- simplifying assumption that all participants agree on the set of
- directory servers. Second, while Mixminion needs to predict node
- behavior, Tor only needs a threshold consensus of the current
- state of the network.
- Tor directory servers build a consensus directory through a simple
- four-round broadcast protocol. In round one, each server dates and
- signs its current opinion, and broadcasts it to the other directory
- servers; then in round two, each server rebroadcasts all the signed
- opinions it has received. At this point all directory servers check
- to see whether any server has signed multiple opinions in the same
- period. Such a server is either broken or cheating, so the protocol
- stops and notifies the administrators, who either remove the cheater
- or wait for the broken server to be fixed. If there are no
- discrepancies, each directory server then locally computes an algorithm
- (described below)
- on the set of opinions, resulting in a uniform shared directory. In
- round three servers sign this directory and broadcast it; and finally
- in round four the servers rebroadcast the directory and all the
- signatures. If any directory server drops out of the network, its
- signature is not included on the final directory.
- The rebroadcast steps ensure that a directory server is heard by
- either all of the other servers or none of them, even when some links
- are down (assuming that any two directory servers can talk directly or
- via a third). Broadcasts are feasible because there are relatively few
- directory servers (currently 3, but we expect as many as 9 as the network
- scales). Computing the shared directory locally is a straightforward
- threshold voting process: we include an OR if a majority of directory
- servers believe it to be good.
- To avoid attacks where a router connects to all the directory servers
- but refuses to relay traffic from other routers, the directory servers
- must build circuits and use them to anonymously test router reliability
- \cite{mix-acc}.
- Using directory servers is simpler and more flexible than flooding.
- For example, flooding complicates the analysis when we
- start experimenting with non-clique network topologies. And because
- the directories are signed, they can be cached by other onion routers.
- Thus directory servers are not a performance
- bottleneck when we have many users, and do not aid traffic analysis by
- forcing clients to periodically announce their existence to any
- central point.
- \Section{Rendezvous points and location privacy}
- \label{sec:rendezvous}
- Rendezvous points are a building block for \emph{location-hidden
- services} (also known as \emph{responder anonymity}) in the Tor
- network. Location-hidden services allow Bob to offer a TCP
- service, such as a webserver, without revealing its IP address.
- This type of anonymity protects against distributed DoS attacks:
- attackers are forced to attack the onion routing network as a whole
- rather than just Bob's IP address.
- Our design for location-hidden servers has the following goals.
- \textbf{Flood-proof:} Bob needs a way to filter incoming requests,
- so an attacker cannot flood Bob simply by sending many requests.
- \textbf{Robust:} Bob should be able to maintain a long-term pseudonymous
- identity even in the presence of router failure. Bob's service must
- not be tied to a single OR, and Bob must be able to tie his service
- to new ORs. \textbf{Smear-resistant:} if a social attacker offers a
- location-hidden service that is illegal or disreputable, it should not
- appear---even to a casual observer---that a rendezvous router is hosting
- that service. \textbf{Application-transparent:} Although we require users
- to run special software to access location-hidden servers, we must not
- require them to modify their applications.
- We provide location-hiding for Bob by allowing him to advertise
- several onion routers (his \emph{introduction points}) as contact
- points. He may do this on any robust efficient
- key-value lookup system with authenticated updates, such as a
- distributed hash table (DHT) like CFS \cite{cfs:sosp01}\footnote{
- Rather than rely on an external infrastructure, the Onion Routing network
- can run the DHT; to begin, we can run a simple lookup system on the
- directory servers.} Alice, the client, chooses an OR as her
- \emph{rendezvous point}. She connects to one of Bob's introduction
- points, informs him about her rendezvous point, and then waits for him
- to connect to the rendezvous point. This extra level of indirection
- helps Bob's introduction points avoid problems associated with serving
- unpopular files directly (for example, if Bob chooses
- an introduction point in Texas to serve anti-ranching propaganda,
- or if Bob's service tends to get attacked by network vandals).
- The extra level of indirection also allows Bob to respond to some requests
- and ignore others.
- We give an overview of the steps of a rendezvous. These steps are
- performed on behalf of Alice and Bob by their local onion proxies;
- application integration is described more fully below.
- \begin{tightlist}
- \item Bob chooses some introduction points, and advertises them on
- the DHT.
- \item Bob establishes a Tor circuit to each of his introduction points,
- and waits.
- \item Alice learns about Bob's service out of band (perhaps Bob told her,
- or she found it on a website). She retrieves the details of Bob's
- service from the DHT.
- \item Alice chooses an OR to serve as the rendezvous point (RP) for this
- transaction. She establishes a circuit to RP, and gives it a
- rendezvous cookie, which it will use to recognize Bob.
- \item Alice opens an anonymous stream to one of Bob's introduction
- points, and gives it a message (encrypted for Bob) which tells him
- about herself, her chosen RP and the rendezvous cookie, and the
- first half of an ephemeral
- key handshake. The introduction point sends the message to Bob.
- \item If Bob wants to talk to Alice, he builds a new circuit to Alice's
- RP and provides the rendezvous cookie and the second half of the DH
- handshake (along with a hash of the session key they now share).
- \item The RP connects Alice's circuit to Bob's. Note that RP can't
- recognize Alice, Bob, or the data they transmit.
- \item Alice now sends a \emph{relay begin} cell along the circuit. It
- arrives at Bob's onion proxy. Bob's onion proxy connects to Bob's
- webserver.
- \item An anonymous stream has been established, and Alice and Bob
- communicate as normal.
- \end{tightlist}
- When establishing an introduction point, Bob provides the onion router
- with a public ``introduction'' key. The hash of this public key
- identifies a unique service, and (since Bob is required to sign his
- messages) prevents anybody else from usurping Bob's introduction point
- in the future. Bob uses the same public key when establishing the other
- introduction points for that service. Bob periodically refreshes his
- entry in the DHT.
- The message that Alice gives
- the introduction point includes a hash of Bob's public key to identify
- the service, along with an optional initial authentication token (the
- introduction point can do prescreening, for example to block replays). Her
- message to Bob may include an end-to-end authentication token so Bob
- can choose whether to respond.
- The authentication tokens can be used to provide selective access:
- important users get tokens to ensure uninterrupted access to the
- service. During normal situations, Bob's service might simply be offered
- directly from mirrors, and Bob gives out tokens to high-priority users. If
- the mirrors are knocked down by distributed DoS attacks or even
- physical attack, those users can switch to accessing Bob's service via
- the Tor rendezvous system.
- Since Bob's introduction points might themselves be subject to DoS he
- could be faced with a choice between keeping many
- introduction connections open or risking such an attack. In this case,
- similar to the authentication tokens, he can provide selected users
- with a current list and/or future schedule of introduction points that
- are not advertised in the DHT\@. This is most likely to be practical
- if there is a relatively stable and large group of introduction points
- generally available. Alternatively, Bob could give secret public keys
- to selected users for consulting the DHT\@. All of these approaches
- have the advantage of limiting the damage that can be done even if
- some of the selected high-priority users collude in the DoS\@.
- \SubSection{Integration with user applications}
- Bob configures his onion proxy to know the local IP address and port of his
- service, a strategy for authorizing clients, and a public key. Bob
- publishes the public key, an expiration time (``not valid after''), and
- the current introduction points for his service into the DHT, all indexed
- by the hash of the public key. Note that Bob's webserver is unmodified,
- and doesn't even know that it's hidden behind the Tor network.
- Alice's applications also work unchanged---her client interface
- remains a SOCKS proxy. We encode all of the necessary information
- into the fully qualified domain name Alice uses when establishing her
- connection. Location-hidden services use a virtual top level domain
- called `.onion': thus hostnames take the form x.y.onion where x is the
- authentication cookie, and y encodes the hash of PK. Alice's onion proxy
- examines addresses; if they're destined for a hidden server, it decodes
- the PK and starts the rendezvous as described in the table above.
- \subsection{Previous rendezvous work}
- Rendezvous points in low-latency anonymity systems were first
- described for use in ISDN telephony \cite{isdn-mixes,jerichow-jsac98}.
- Later low-latency designs used rendezvous points for hiding location
- of mobile phones and low-power location trackers
- \cite{federrath-ih96,reed-protocols97}. Rendezvous for low-latency
- Internet connections was suggested in early Onion Routing work
- \cite{or-ih96}; however, the first published design of rendezvous
- points for low-latency Internet connections was by Ian Goldberg
- \cite{ian-thesis}. His design differs from
- ours in three ways. First, Goldberg suggests that Alice should manually
- hunt down a current location of the service via Gnutella; whereas our
- use of CFS makes lookup faster, more robust, and transparent to the
- user. Second, in Tor the client and server negotiate ephemeral keys
- via Diffie-Hellman, so plaintext is not exposed at any point. Third,
- our design tries to minimize the exposure associated with running the
- service, to encourage volunteers to offer introduction and rendezvous
- point services. Tor's introduction points do not output any bytes to the
- clients, and the rendezvous points don't know the client or the server,
- and can't read the data being transmitted. The indirection scheme is
- also designed to include authentication/authorization---if Alice doesn't
- include the right cookie with her request for service, Bob need not even
- acknowledge his existence.
- \Section{Attacks and Defenses}
- \label{sec:attacks}
- % XXX In sec4 we should talk about bandwidth classes, which will
- % enable us to accept a lot more ORs than if we continue to
- % require 10mbit connections for all ORs. -RD
- % XXX In sec9, we should note that we are currently
- % working with the designers of MorphMix to render our two systems
- % interoperable. So far, this seems to be relatively straightforward.
- % Interoperability will allow testing and direct comparison of the two
- % rather different designs.
-
- Below we summarize a variety of attacks, and discuss how well our
- design withstands them.
- \subsubsection*{Passive attacks}
- \emph{Observing user traffic patterns.} Observations of connection
- between a user and her first onion router will not reveal to whom
- the user is connecting or what information is being sent. It will
- reveal patterns of user traffic (both sent and received). Simple
- profiling of user connection patterns is not generally possible,
- however, because multiple application streams may be operating
- simultaneously or in series over a single circuit. Thus, further
- processing is necessary to discern even these usage patterns.
-
- \emph{Observing user content.} At the user end, content is
- encrypted; however, connections from the network to arbitrary
- websites may not be. Further, a responding website may itself be
- hostile. Filtering content is not a primary goal of
- Onion Routing; nonetheless, Tor can directly make use of Privoxy and
- related filtering services to anonymize application data streams.
- \emph{Option distinguishability.} Configuration options can be a
- source of distinguishable patterns. In general there is economic
- incentive to allow preferential services \cite{econymics}, and some
- degree of configuration choice can attract users, which
- provide anonymity. So far, however, we have
- not found a compelling use case in Tor for any client-configurable
- options. Thus, clients are currently distinguishable only by their
- behavior.
- %XXX Actually, circuitrebuildperiod is such an option. -RD
-
- \emph{End-to-end Timing correlation.} Tor only minimally hides
- end-to-end timing correlations. An attacker watching patterns of
- traffic at the initiator and the responder will be
- able to confirm the correspondence with high probability. The
- greatest protection currently available against such confirmation is to hide
- the connection between the onion proxy and the first Tor node,
- by running the onion proxy locally or
- behind a firewall. This approach
- requires an observer to separate traffic originating at the onion
- router from traffic passing through it; but because we do not mix
- or pad, this does not provide much defense.
-
- \emph{End-to-end Size correlation.} Simple packet counting
- without timing correlation will also be effective in confirming
- endpoints of a stream. However, even without padding, we have some
- limited protection: the leaky pipe topology means different numbers
- of packets may enter one end of a circuit than exit at the other.
-
- \emph{Website fingerprinting.} All the above passive
- attacks that are at all effective are traffic confirmation attacks.
- This puts them outside our general design goals. There is also
- a passive traffic analysis attack that is potentially effective.
- Rather than searching exit connections for timing and volume
- correlations, the adversary may build up a database of
- ``fingerprints'' containing file sizes and access patterns for many
- interesting websites. He can confirm a user's connection to a given
- site simply by consulting the database. This attack has
- been shown to be effective against SafeWeb \cite{hintz-pet02}. But
- Tor is not as vulnerable as SafeWeb to this attack: there is the
- possibility that multiple streams are exiting the circuit at
- different places concurrently. Also, fingerprinting will be limited to
- the granularity of cells, currently 256 bytes. Other defenses include
- larger cell sizes and/or minimal padding schemes that group websites
- into large sets. But this remains an open problem. Link
- padding or long-range dummies may also make fingerprints harder to
- detect.\footnote{Note that
- such fingerprinting should not be confused with the latency attacks
- of \cite{back01}. Those require a fingerprint of the latencies of
- all circuits through the network, combined with those from the
- network edges to the targeted user and the responder website. While
- these are in principal feasible and surprises are always possible,
- these constitute a much more complicated attack, and there is no
- current evidence of their practicality.}
- \subsubsection*{Active attacks}
- \emph{Compromise keys.}
- If a TLS session key is compromised, an attacker
- can view all the cells on TLS connection until the key is
- renegotiated. (These cells are themselves encrypted.) If a TLS
- private key is compromised, the attacker can fool others into
- thinking that he is the affected OR, but still cannot accept any
- connections. \\
- If a circuit session key is compromised, the
- attacker can unwrap a single layer of encryption from the relay
- cells traveling along that circuit. (Only nodes on the circuit can
- see these cells.) If an onion private key is compromised, the attacker
- can impersonate the OR in circuits, but only if the attacker has
- also compromised the OR's TLS private key, or is running the
- previous OR in the circuit. (This compromise affects newly created
- circuits, but because of perfect forward secrecy, the attacker
- cannot hijack old circuits without compromising their session keys.)
- In any case, periodic key rotation limits the window of opportunity
- for compromising these keys. \\
- Only by
- compromising a node's identity key can an attacker replace that
- node indefinitely, by sending new forged descriptors to the
- directory servers. Finally, an attacker who can compromise a
- directory server's identity key can influence every client's view
- of the network---but only to the degree made possible by gaining a
- vote with the rest of the the directory servers.
- \emph{Iterated compromise.} A roving adversary who can
- compromise ORs (by system intrusion, legal coersion, or extralegal
- coersion) could march down the circuit compromising the
- nodes until he reaches the end. Unless the adversary can complete
- this attack within the lifetime of the circuit, however, the ORs
- will have discarded the necessary information before the attack can
- be completed. (Thanks to the perfect forward secrecy of session
- keys, the attacker cannot force nodes to decrypt recorded
- traffic once the circuits have been closed.) Additionally, building
- circuits that cross jurisdictions can make legal coercion
- harder---this phenomenon is commonly called ``jurisdictional
- arbitrage.'' The Java Anon Proxy project recently experienced the
- need for this approach, when
- the German government successfully ordered them to add a backdoor to
- all of their nodes \cite{jap-backdoor}.
- \emph{Run a recipient.} By running a Web server, an adversary
- trivially learns the timing patterns of users connecting to it, and
- can introduce arbitrary patterns in its responses. This can greatly
- facilitate end-to-end attacks: If the adversary can induce certain
- users to connect to his webserver (perhaps by advertising
- content targeted at those users), she now holds one end of their
- connection. Additionally, there is a danger that the application
- protocols and associated programs can be induced to reveal
- information about the initiator. Tor does not aim to solve this problem;
- we depend on Privoxy and similar protocol cleaners.
-
- \emph{Run an onion proxy.} It is expected that end users will
- nearly always run their own local onion proxy. However, in some
- settings, it may be necessary for the proxy to run
- remotely---typically, in an institutional setting which wants
- to monitor the activity of those connecting to the proxy.
- Compromising an onion proxy means compromising all future connections
- through it.
- \emph{DoS non-observed nodes.} An observer who can observe some
- of the Tor network can increase the value of this traffic analysis
- by attacking non-observed nodes to shut them down, reduce
- their reliability, or persuade users that they are not trustworthy.
- The best defense here is robustness.
-
- \emph{Run a hostile node.} In addition to the abilities of a
- local observer, an isolated hostile node can create circuits through
- itself, or alter traffic patterns, to affect traffic at
- other nodes. Its ability to directly DoS a neighbor is now limited
- by bandwidth throttling. Nonetheless, in order to compromise the
- anonymity of the endpoints of a circuit by its observations, a
- hostile node must be immediately adjacent to that endpoint.
-
- \emph{Run multiple hostile nodes.} If an adversary is able to
- run multiple ORs, and is able to persuade the directory servers
- that those ORs are trustworthy and independant, then occasionally
- some user will choose one of those ORs for the start and another
- as the end of a circuit. When this happens, the user's
- anonymity is compromised for those streams. If an adversary can
- control $m$ out of $N$ nodes, he should be able to correlate at most
- $\left(\frac{m}{N}\right)^2$ of the traffic in this way---although an
- adversary
- could possibly attract a disproportionately large amount of traffic
- by running an exit node with an unusually permissive exit policy.
- \emph{Compromise entire path.} Anyone compromising both
- endpoints of a circuit can confirm this with high probability. If
- the entire path is compromised, this becomes a certainty; however,
- the added benefit to the adversary of such an attack is small in
- relation to the difficulty.
- \emph{Run a hostile directory server.} Directory servers control
- admission to the network. However, because the network directory
- must be signed by a majority of servers, the threat of a single
- hostile server is minimized.
-
- \emph{Selectively DoS a Tor node.} As noted, neighbors are
- bandwidth limited; however, it is possible to open up sufficient
- circuits that converge at a single onion router to
- overwhelm its network connection, its ability to process new
- circuits, or both.
- % We aim to address something like this attack with our congestion
- % control algorithm.
- \emph{Introduce timing into messages.} This is simply a stronger
- version of passive timing attacks already discussed above.
-
- \emph{Tagging attacks.} A hostile node could ``tag'' a
- cell by altering it. This would render it unreadable, but if the
- stream is, for example, an unencrypted request to a Web site,
- the garbled content coming out at the appropriate time could confirm
- the association. However, integrity checks on cells prevent
- this attack.
- \emph{Replace contents of unauthenticated protocols.} When
- relaying an unauthenticated protocol like HTTP, a hostile exit node
- can impersonate the target server. Thus, whenever possible, clients
- should prefer protocols with end-to-end authentication.
- \emph{Replay attacks.} Some anonymity protocols are vulnerable
- to replay attacks. Tor is not; replaying one side of a handshake
- will result in a different negotiated session key, and so the rest
- of the recorded session can't be used.
- \emph{Smear attacks.} An attacker could use the Tor network to
- engage in socially dissapproved acts, so as to try to bring the
- entire network into disrepute and get its operators to shut it down.
- Exit policies can help reduce the possibilities for abuse, but
- ultimately, the network will require volunteers who can tolerate
- some political heat.
- \emph{Distribute hostile code.} An attacker could trick users
- into running subverted Tor software that did not, in fact, anonymize
- their connections---or worse, trick ORs into running weakened
- software that provided users with less anonymity. We address this
- problem (but do not solve it completely) by signing all Tor releases
- with an official public key, and including an entry in the directory
- describing which versions are currently believed to be secure. To
- prevent an attacker from subverting the official release itself
- (through threats, bribery, or insider attacks), we provide all
- releases in source code form, encourage source audits, and
- frequently warn our users never to trust any software (even from
- us!) that comes without source.
- \subsubsection*{Directory attacks}
- \emph{Destroy directory servers.} If a few directory
- servers drop out of operation, the others still arrive at a final
- directory. So long as any directory servers remain in operation,
- they will still broadcast their views of the network and generate a
- consensus directory. (If more than half are destroyed, this
- directory will not, however, have enough signatures for clients to
- use it automatically; human intervention will be necessary for
- clients to decide whether to trust the resulting directory, or continue
- to use the old valid one.)
- \emph{Subvert a directory server.} By taking over a directory
- server, an attacker can influence (but not control) the final
- directory. Since ORs are included or excluded by majority vote,
- the corrupt directory can at worst cast a tie-breaking vote to
- decide whether to include marginal ORs. How often such marginal
- cases will occur in practice, however, remains to be seen.
- \emph{Subvert a majority of directory servers.} If the
- adversary controls more than half of the directory servers, he can
- decide on a final directory, and thus can include as many
- compromised ORs in the final directory as he wishes. Other than
- trying to ensure that directory server operators are truly
- independent and resistant to attack, Tor does not address this
- possibility.
- \emph{Encourage directory server dissent.} The directory
- agreement protocol requires that directory server operators agree on
- the list of directory servers. An adversary who can persuade some
- of the directory server operators to distrust one another could
- split the quorum into mutually hostile camps, thus partitioning
- users based on which directory they used. Tor does not address
- this attack.
- \emph{Trick the directory servers into listing a hostile OR.}
- Our threat model explicitly assumes directory server operators will
- be able to filter out most hostile ORs. If this is not true, an
- attacker can flood the directory with compromised servers.
- \emph{Convince the directories that a malfunctioning OR is
- working.} In the current Tor implementation, directory servers
- assume that if they can start a TLS connection to an an OR, that OR
- must be running correctly. It would be easy for a hostile OR to
- subvert this test by only accepting TLS connections from ORs, and
- ignoring all cells. Thus, directory servers must actively test ORs
- by building circuits and streams as appropriate. The benefits and
- hazards of a similar approach are discussed in \cite{mix-acc}.
-
- \subsubsection*{Attacks against rendezvous points}
- \emph{Make many introduction requests.} An attacker could
- attempt to deny Bob service by flooding his Introduction Point with
- requests. Because the introduction point can block requests that
- lack authentication tokens, however, Bob can restrict the volume of
- requests he receives, or require a certain amount of computation for
- every request he receives.
-
- \emph{Attack an introduction point.} An attacker could try to
- disrupt a location-hidden service by disabling its introduction
- point. But because a service's identity is attached to its public
- key, not its introduction point, the service can simply re-advertise
- itself at a different introduction point.
- \emph{Attack multiple introduction points.} If an attacker is
- able to disable all of the introduction points for a given service,
- he can block access to the service. However, re-advertisement of
- introduction points can still be done secretly so that only
- high-priority clients know the address of the service's introduction
- points. These selective secret authorizations can also be issued
- during normal operation. Thus an attacker must disable
- all possible introduction points.
- \emph{Compromise an introduction point.} If an attacker controls
- an introduction point for a service, it can flood the service with
- introduction requests, or prevent valid introduction requests from
- reaching the hidden server. The server will notice a flooding
- attempt if it receives many introduction requests. To notice
- blocking of valid requests, however, the hidden server should
- periodically test the introduction point by sending its introduction
- requests, and making sure it receives them.
- \emph{Compromise a rendezvous point.} Controlling a rendezvous
- point gains an attacker no more than controlling any other OR along
- a circuit, since all data passing along the rendezvous is protected
- by the session key shared by the client and server.
- \Section{Open Questions in Low-latency Anonymity}
- \label{sec:maintaining-anonymity}
-
- % There must be a better intro than this! -NM
- In addition to the open problems discussed in
- Section~\ref{subsec:non-goals}, many other questions remain to be
- solved by future research before we can be confident that we
- have built a secure low-latency anonymity service.
- Many of these open issues are questions of balance. For example,
- how often should users rotate to fresh circuits? Too-frequent
- rotation is inefficient, expensive, and may lead to intersection attacks,
- but too-infrequent rotation
- makes the user's traffic linkable. Instead of opening a fresh
- circuit; clients can also limit linkability by exiting from a middle point
- of the circuit, or by truncating and re-extending the circuit, but
- more analysis is needed to determine the proper trade-off.
- %[XXX mention predecessor attacks?]
- A similar question surrounds timing of directory operations:
- how often should directories be updated? With too-infrequent
- updates clients receive an inaccurate picture of the network; with
- too-frequent updates the directory servers are overloaded.
- %do different exit policies at different exit nodes trash anonymity sets,
- %or not mess with them much?
- %
- %% Why would they? By routing traffic to certain nodes preferentially?
- %[XXX Choosing paths and path lengths: I'm not writing this bit till
- % Arma's pathselection stuff is in. -NM]
- %%%% Roger said that he'd put a path selection paragraph into section
- %%%% 4 that would replace this.
- %
- %I probably should have noted that this means loops will be on at least
- %five hop routes, which should be rare given the distribution. I'm
- %realizing that this is reproducing some of the thought that led to a
- %default of five hops in the original onion routing design. There were
- %some different assumptions, which I won't spell out now. Note that
- %enclave level protections really change these assumptions. If most
- %circuits are just two hops, then just a single link observer will be
- %able to tell that two enclaves are communicating with high probability.
- %So, it would seem that enclaves should have a four node minimum circuit
- %to prevent trivial circuit insider identification of the whole circuit,
- %and three hop minimum for circuits from an enclave to some nonclave
- %responder. But then... we would have to make everyone obey these rules
- %or a node that through timing inferred it was on a four hop circuit
- %would know that it was probably carrying enclave to enclave traffic.
- %Which... if there were even a moderate number of bad nodes in the
- %network would make it advantageous to break the connection to conduct
- %a reformation intersection attack. Ahhh! I gotta stop thinking
- %about this and work on the paper some before the family wakes up.
- %On Sat, Oct 25, 2003 at 06:57:12AM -0400, Paul Syverson wrote:
- %> Which... if there were even a moderate number of bad nodes in the
- %> network would make it advantageous to break the connection to conduct
- %> a reformation intersection attack. Ahhh! I gotta stop thinking
- %> about this and work on the paper some before the family wakes up.
- %This is the sort of issue that should go in the 'maintaining anonymity
- %with tor' section towards the end. :)
- %Email from between roger and me to beginning of section above. Fix and move.
- Throughout this paper, we have assumed that end-to-end traffic
- confirmation will immediately and automatically defeat a low-latency
- anonymity system. Even high-latency anonymity
- systems can be vulnerable to end-to-end traffic confirmation, if the
- traffic volumes are high enough, and if users' habits are sufficiently
- distinct \cite{limits-open,statistical-disclosure}. \emph{Can
- anything be done to make low-latency systems resist these attacks as
- well as high-latency systems?}
- Tor already makes some effort to conceal the starts and
- ends of streams by wrapping all long-range control commands in
- identical-looking relay cells, but more analysis is needed. Link
- padding could frustrate passive observers who count packets; long-range
- padding could work against observers who own the first hop in a
- circuit. But more research needs to be done in order to find an
- efficient and practical approach. Volunteers prefer not to run
- constant-bandwidth padding; but more sophisticated traffic shaping
- approaches remain somewhat unanalyzed.
- %[XXX is this so?]
- Recent work
- on long-range padding \cite{defensive-dropping} shows promise. One
- could also try to reduce correlation in packet timing by batching and
- re-ordering packets, but it is unclear whether this could improve
- anonymity without introducing so much latency as to render the
- network unusable.
- Even if passive timing attacks were wholly solved, active timing
- attacks would remain. \emph{What can
- be done to address attackers who can introduce timing patterns into
- a user's traffic?} % [XXX mention likely approaches]
- %%% I think we cover this by framing the problem as ``Can we make
- %%% end-to-end characteristics of low-latency systems as good as
- %%% those of high-latency systems?'' Eliminating long-term
- %%% intersection is a hard problem.
- %
- %Even regardless of link padding from Alice to the cloud, there will be
- %times when Alice is simply not online. Link padding, at the edges or
- %inside the cloud, does not help for this.
- In order to scale to many users, and to prevent an
- attacker from observing the whole network at once, it may be necessary
- for low-latency anonymity systems to support far more servers than Tor
- currently anticipates. This introduces several issues. First, if
- approval by a centralized set of directory servers is no longer
- feasible, what mechanism should be used to prevent adversaries from
- signing up many spurious servers?
- Second, if clients can no longer have a complete
- picture of the network at all times, how can they perform
- discovery while preventing attackers from manipulating or exploiting
- gaps in client knowledge? Third, if there are too many servers
- for every server to constantly communicate with every other, what kind
- of non-clique topology should the network use? Restricted-route
- topologies promise comparable anonymity with better scalability
- \cite{danezis-pets03}, but whatever topology we choose, we need some
- way to keep attackers from manipulating their position within it.
- Fourth, since no centralized authority is tracking server reliability,
- How do we prevent unreliable servers from rendering the network
- unusable? Fifth, do clients receive so much anonymity benefit from
- running their own servers that we should expect them all to do so, or
- do we need to find another incentive structure to motivate them?
- (Tarzan and MorphMix present possible solutions.)
- % [[ XXX how to approve new nodes (advogato, sybil, captcha (RTT));]
- Alternatively, it may be the case that one of these problems proves
- intractable, or that the drawbacks to many-server systems prove
- greater than the benefits. Nevertheless, we may still do well to
- consider non-clique topologies. A cascade topology may provide more
- defense against traffic confirmation.
- % XXX Why would it? Cite. -NM
- %
- % Huh? Do you mean for simple attacks just because of larger anonymity
- % sets? -PS
- Does the hydra topology (many input nodes, few output nodes) work
- better? Are we going to get a hydra anyway because most nodes will be
- middleman nodes?
- As mentioned in Section~\ref{subsec:dos}, Tor could improve its
- robustness against node failure by buffering transmitted stream data
- at the network's edges until the data has been acknowledged by the
- other end of the stream. The efficacy of this approach remains to be
- tested, however, and there may be more effective means for ensuring
- reliable connections in the presence of unreliable nodes.
- %%% Keeping this original paragraph for a little while, since it
- %%% is not the same as what's written there now.
- %
- %Because Tor depends on TLS and TCP to provide a reliable transport,
- %when one of the servers goes down, all the circuits (and thus streams)
- %traveling over that server must break. This reduces anonymity because
- %everybody needs to reconnect right then (does it? how much?) and
- %because exit connections all break at the same time, and it also harms
- %usability. It seems the problem is even worse in a peer-to-peer
- %environment, because so far such systems don't really provide an
- %incentive for nodes to stay connected when they're done browsing, so
- %we would expect a much higher churn rate than for onion routing.
- %there ways of allowing streams to survive the loss of a node in the
- %path?
- % Roger or Paul suggested that we say something about incentives,
- % too, but I think that's a better candidate for our future work
- % section. After all, we will doubtlessly learn very much about why
- % people do or don't run and use Tor in the near future. -NM
- %We should run a squid at each exit node, to provide comparable anonymity
- %to private exit nodes for cache hits, to speed everything up, and to
- %have a buffer for funny stuff coming out of port 80.
- % on the other hand, it hampers PFS, because ORs have pages in the cache.
- %I previously elsewhere suggested bulk transfer proxies to carve
- %up big things so that they could be downloaded in less noticeable
- %pieces over several normal looking connections. We could suggest
- %similarly one or a handful of squid nodes that might serve up
- %some of the more sensitive but common material, especially if
- %the relevant sites didn't want to or couldn't run their own OR.
- %This would be better than having everyone run a squid which would
- %just help identify after the fact the different history of that
- %node's activity. All this kind of speculation needs to move to
- %future work section I guess. -PS]
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \Section{Future Directions}
- \label{sec:conclusion}
- Tor brings together many innovations into
- a unified deployable system. But there are still several attacks that
- work quite well, as well as a number of sustainability and run-time
- issues remaining to be ironed out. In particular:
- % Many of these (Scalability, cover traffic, morphmix)
- % are duplicates from open problems.
- %
- \emph{Scalability:} Tor's emphasis on design simplicity and
- deployability has led us to adopt a clique topology, a
- semi-centralized model for directories and trusts, and a
- full-network-visibility model for client knowledge. None of these
- properties will scale to more than a few hundred servers, at most.
- Promising approaches to better scalability exist (see
- Section~\ref{sec:maintaining-anonymity}), but more deployment
- experience would be helpful in learning the relative importance of
- these bottlenecks.
- \emph{Cover traffic:} Currently we avoid cover traffic because
- of its clear costs in performance and bandwidth, and because its
- security benefits are not well understood. With more research
- \cite{SS03,defensive-dropping}, the price/value ratio may change,
- both for link-level cover traffic and also long-range cover traffic.
- \emph{Better directory distribution:} Even with the threshold
- directory agreement algorithm described in Section~\ref{subsec:dirservers},
- the directory servers are still trust bottlenecks. We must find more
- decentralized yet practical ways to distribute up-to-date snapshots of
- network status without introducing new attacks. Also, directory
- retrieval presents a scaling problem, since clients currently
- download a description of the entire network state every 15
- minutes. As the state grows larger and clients more numerous, we
- may need to move to a solution in which clients only receive
- incremental updates to directory state, or where directories are
- cached at the ORs to avoid high loads on the directory servers.
- % XXX this is a design paper, not an implementation paper. the design
- % says that they're already cached at the ORs. Agree/disagree?
- % XXX Agree. -NM
- \emph{Implementing location-hidden servers:} While
- Section~\ref{sec:rendezvous} describes a design for rendezvous
- points and location-hidden servers, these features have not yet been
- implemented. While doing so we are likely to encounter additional
- issues that must be resolved, both in terms of usability and anonymity.
- \emph{Further specification review:} Although we have a public,
- byte-level specification for the Tor protocols, this protocol has
- not received extensive external review. We hope that as Tor
- becomes more widely deployed, more people will become interested in
- examining our specification.
- \emph{Wider-scale deployment:} The original goal of Tor was to
- gain experience in deploying an anonymizing overlay network, and
- learn from having actual users. We are now at the point in design
- and development where we can start deploying a wider network. Once
- we have many actual users, we will doubtlessly be better
- able to evaluate some of our design decisions, including our
- robustness/latency trade-offs, our performance trade-offs (including
- cell size), our abuse-prevention mechanisms, and
- our overall usability.
- % XXX large and small cells on same network.
- % XXX work with morphmix spec
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% commented out for anonymous submission
- %\Section{Acknowledgments}
- % Peter Palfrader, Geoff Goodell, Adam Shostack, Joseph Sokol-Margolis,
- % John Bashinski
- % for editing and comments
- % Matej Pfajfar, Andrei Serjantov, Marc Rennhard for design discussions
- % Bram Cohen for congestion control discussions
- % Adam Back for suggesting telescoping circuits
- % Cathy Meadows for formal analysis of candidate extend DH protocols
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \bibliographystyle{latex8}
- \bibliography{tor-design}
- \end{document}
- % Style guide:
- % U.S. spelling
- % avoid contractions (it's, can't, etc.)
- % prefer ``for example'' or ``such as'' to e.g.
- % prefer ``that is'' to i.e.
- % 'mix', 'mixes' (as noun)
- % 'mix-net'
- % 'mix', 'mixing' (as verb)
- % 'middleman' [Not with a hyphen; the hyphen has been optional
- % since Middle English.]
- % 'nymserver'
- % 'Cypherpunk', 'Cypherpunks', 'Cypherpunk remailer'
- % 'Onion Routing design', 'onion router' [note capitalization]
- % 'SOCKS'
- % Try not to use \cite as a noun.
- % 'Authorizating' sounds great, but it isn't a word.
- % 'First, second, third', not 'Firstly, secondly, thirdly'.
- % 'circuit', not 'channel'
- % Typography: no space on either side of an em dash---ever.
- % Hyphens are for multi-part words; en dashs imply movement or
- % opposition (The Alice--Bob connection); and em dashes are
- % for punctuation---like that.
- % A relay cell; a control cell; a \emph{create} cell; a
- % \emph{relay truncated} cell. Never ``a \emph{relay truncated}.''
- %
- % 'Substitute ``Damn'' every time you're inclined to write ``very;'' your
- % editor will delete it and the writing will be just as it should be.'
- % -- Mark Twain
|