| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550 | \documentclass{llncs}\usepackage{url}\usepackage{amsmath}\usepackage{epsfig}\newenvironment{tightlist}{\begin{list}{$\bullet$}{  \setlength{\itemsep}{0mm}    \setlength{\parsep}{0mm}    %  \setlength{\labelsep}{0mm}    %  \setlength{\labelwidth}{0mm}    %  \setlength{\topsep}{0mm}    }}{\end{list}}\begin{document}\title{Challenges in deploying low-latency anonymity (DRAFT)}\author{Roger Dingledine and Nick Mathewson}\institute{The Free Haven Project\\\email{\{arma,nickm\}@freehaven.net}}\maketitle\pagestyle{empty}\begin{abstract}    We describe our experiences with deploying Tor, a low-latency  anonymous general purpose communication system that has been funded  by the U.S.~Navy, DARPA, and the Electronic Frontier Foundation. The  basic Tor design supports most applications that run over TCP (those  that are SOCKS compliant).%Because of its simplified threat model, Tor does not aim to defend%against many of the attacks in the literature.We describe both policy issues that have come up from operating thenetwork and technical challenges to building a more sustainable andscalable network.\end{abstract}\section{Introduction}Anonymous communication is full of surprises. In this paper we willtell you about some of them. We will describe the challenges arisingfrom our experiences with deploying, Tor, a low-latency anonymous generalpurpose communication system. We will discuss some of the difficultieswe have experienced, how we have met them or, when we have some idea,how we plan to meet them. We will also discuss some tough openproblems that have not given us any trouble in our current deployment.We will describe both those future challenges that we intend to andthose that we have decided not to explore and why.Tor is an overlay network, designedto be practical and usable, for protecting TCP streams over theInternet~\cite{tor-design}. We have been operating a publicly deployedTor network since October 2003 that has grown to over a hundred volunteernodes and sometimes as much as 80 megabits of average traffic per second.Tor has a weaker threat model than many anonymity designs in theliterature, because our foremost goal is to deploy apractical and useful network for interactive (low-latency) communications.Subject to this restriction, we try toprovide as much anonymity as we can. In particular, because wesupport interactive communications without impractically expensive padding,we fall prey to a varietyof intra-network~\cite{back01,attack-tor-oak05,flow-correlation04} andend-to-end~\cite{danezis-pet2004,SS03} anonymity-breaking attacks.Users are safe so long as adversaries are unable toobserve connections as they both enter and leave the Tor network.Therefore, Tor's defense lies in having a diverse enough set of serversthat most real-worldadversaries are unlikely to be in the right places to attack users.Specifically,Tor aims to resist observers and insiders by distributing each transactionover several nodes in the network.  This ``distributed trust'' approachmeans the Tor network can be safely operated and used by a wide varietyof mutually distrustful users, providing more sustainability and securitythan some previous attempts at anonymizing networks.The Tor network has a broad range of users, including ordinary citizensconcerned about their privacy, corporationswho don't want to reveal information to their competitors, and lawenforcement and government intelligence agencies who needto do operations on the Internet without being noticed.Tor research and development has been funded by the U.S.~Navy and DARPAfor use in securing governmentcommunications, and by the Electronic Frontier Foundation, for usein maintaining civil liberties for ordinary citizens online. The Torprotocol is one of the leading choicesto be the anonymizing layer in the European Union's PRIME directive tohelp maintain privacy in Europe. The University of Dresden in Germanyhas integrated an independent implementation of the Tor protocol intotheir popular Java Anon Proxy anonymizing client. This wide variety ofinterests helps maintain both the stability and the security of thenetwork.The ideal Tor network would be practical, useful and and anonymous. Whentrade-offs arise between these properties, Tor's research strategy has beento insist on remaining useful enough to attract many users,and practical enough to support them.  Subject to theseconstraints, we aim to maximize anonymity.  This is not the only possibledirection in anonymity research: designs exist that provide more anonymitythan Tor at the expense of significantly increased resource requirements, ordecreased flexibility in application support (typically because of increasedlatency).  Such research does not typically abandon aspirations towardsdeployability or utility, but instead tries to maximize deployability andutility subject to a certain degree of inherent anonymity (inherent becauseusability and practicality affect usage which affects the actual anonymityprovided by the network \cite{back01,econymics}). We believe that theseapproaches can be promising and useful, but that by focusing on deploying ausable system in the wild, Tor helps us experiment with the actual parametersof what makes a system ``practical'' for volunteer operators and ``useful''for home users, and helps illuminate undernoticed issues which any deployedvolunteer anonymity network will need to address. While the Tor design paper~\cite{tor-design} gives an overall view itsdesign and goals,this paper describes the policy and technical issues that Tor faces aswe continue deployment. Rather than trying to provide complete solutionsto every problem here, we lay out the assumptions and constraintsthat we have observed through deploying Tor in the wild. In doing so, weaim to create a research agenda for others tohelp in addressing these issues.% Section~\ref{sec:what-is-tor} gives an%overview of the Tor%design and ours goals. Sections~\ref{sec:crossroads-policy}%and~\ref{sec:crossroads-design} go on to describe the practical challenges,%both policy and technical respectively,%that stand in the way of moving%from a practical useful network to a practical useful anonymous network.%\section{What Is Tor}\section{Distributed trust: safety in numbers}\label{sec:what-is-tor}%Here we give a basic overview of the Tor design and its properties. For%details on the design, assumptions, and security arguments, we refer%the reader to the Tor design paper~\cite{tor-design}.% XXX this section needs to mention that we have exit policies.Tor provides \emph{forward privacy}, so that users can connect toInternet sites without revealing their logical or physical locationsto those sites or to observers.  It also provides \emph{location-hiddenservices}, so that critical servers can support authorized users withoutgiving adversaries an effective vector for physical or online attacks.The design provides these protections even when a portion of its owninfrastructure is controlled by an adversary.To create a private network pathway with Tor, the clientincrementally builds a \emph{circuit} of encrypted connections throughservers on the network. The circuit is extended one hop at a time, andeach server along the way knows only which server gave it data and whichserver it is giving data to. No individual server ever knows the completepath that a data packet has taken. The client negotiates a separate setof encryption keys for each hop along the circuit to ensure that eachhop can't trace these connections as they pass through.Because each server sees no more than one hop in thecircuit, neither an eavesdropper nor a compromised server can use trafficanalysis to link the connection's source and destination.For efficiency, the Tor software uses the same circuit for connectionsthat happen within the same short period. Later requests are given a newcircuit, to prevent long-term linkability between different actions bya single user.Tor also makes it possible for users to hide their locations whileoffering various kinds of services, such as web publishing or an instantmessaging server. Using Tor ``rendezvous points'', other Tor users canconnect to these hidden services, each without knowing the other's networkidentity.Tor attempts to anonymize the transport layer, not the application layer, soapplication protocols that include personally identifying information needadditional application-level scrubbing proxies, such asPrivoxy~\cite{privoxy} for HTTP.  Furthermore, Tor does not permit arbitraryIP packets; it only anonymizes TCP and DNS, and only supports connections viaSOCKS (see Section~\ref{subsec:tcp-vs-ip}).Tor differs from other deployed systems for traffic analysis resistancein its security and flexibility.  Mix networks such asMixmaster~\cite{mixmaster-spec} or its successor Mixminion~\cite{minion-design}gain the highest degrees of anonymity at the expense of introducing highlyvariable delays, thus making them unsuitable for applications such as webbrowsing.  Commercial single-hopproxies~\cite{anonymizer} present a single point of failure, wherea single compromise can expose all users' traffic, and a single-pointeavesdropper can perform traffic analysis on the entire network.Also, their proprietary implementations place any infrastucture thatdepends on these single-hop solutions at the mercy of their providers'financial health as well as network security.No organization can achieve this security on its own.  If a singlecorporation or government agency were to build a private network toprotect its operations, any connections entering or leaving that networkwould be obviously linkable to the controlling organization.  The membersand operations of that agency would be easier, not harder, to distinguish.Instead, to protect our networks from traffic analysis, we mustcollaboratively blend the traffic from many organizations and privatecitizens, so that an eavesdropper can't tell which users are which,and who is looking for what information.  By bringing more users ontothe network, all users become more secure~\cite{econymics}.Naturally, organizations will not want to depend on others for theirsecurity.  If most participating providers are reliable, Tor toleratessome hostile infiltration of the network.  For maximum protection,the Tor design includes an enclave approach that lets data be encrypted(and authenticated) end-to-end, so high-sensitivity users can be sure ithasn't been read or modified.  This even works for Internet services thatdon't have built-in encryption and authentication, such as unencryptedHTTP or chat, and it requires no modification of those services.As of January 2005, the Tor network has grown to around a hundred serverson four continents, with a total capacity exceeding 1Gbit/s. Appendix Ashows a graph of the number of working servers over time, as well as agraph of the number of bytes being handled by the network over time. Atthis point the network is sufficiently diverse for further developmentand testing; but of course we always encourage and welcome new serversto join the network.%Tor doesn't try to provide steg (but see Section~\ref{subsec:china}), or%the other non-goals listed in tor-design.Tor is not the only anonymity system that aims to be practical and useful.Commercial single-hop proxies~\cite{anonymizer}, as well as unsecuredopen proxies around the Internet, can provide goodperformance and some security against a weaker attacker. The JavaAnon Proxy~\cite{web-mix} provides similar functionality to Tor but onlyhandles web browsing rather than arbitrary TCP\@.%Some peer-to-peer file-sharing overlay networks such as%Freenet~\cite{freenet} and Mute~\cite{mute}Zero-Knowledge Systems' commercial Freedomnetwork~\cite{freedom21-security} was even more flexible than Tor inthat it could transport arbitrary IP packets, and it also supportedpseudonymous access rather than just anonymous access; but it hada different approach to sustainability (collecting money from usersand paying ISPs to run servers), and has shut down due to financialload.  Finally, more scalable designs like Tarzan~\cite{tarzan:ccs02} andMorphMix~\cite{morphmix:fc04} have been proposed in the literature, buthave not yet been fielded. We direct the interested reader to Section2 of~\cite{tor-design} for a more indepth review of related work.%six-four. crowds. i2p.have a serious discussion of morphmix's assumptions, since they wouldseem to be the direct competition. in fact tor is a flexible architecturethat would encompass morphmix, and they're nearly identical except forpath selection and node discovery. and the trust system morphmix hasseems overkill (and/or insecure) based on the threat model we've picked.% this para should probably move to the scalability / directory system. -RD\section{Threat model}\label{sec:threat-model}Tor does not attempt to defend against a global observer.  Any adversary whocan see a user's connection to the Tor network, and who can see thecorresponding connection as it exits the Tor network, can use timingcorrelation to confirm the user's chosencommunication partners.  Defeating this attack would seem to requireintroducing a prohibitive degree of traffic padding between the user and thenetwork, or introducing an unacceptable degree of latency (but seeSection \ref{subsec:mid-latency}).And, it is not clear that padding works at all if we assume aminimally active adversary that modifies the timing of packetsto or from the user by sending network traffic of his own. Thus, Toronly attempts to defend againstexternal observers who cannot observe both sides of a user'sconnection.Against internal attackers who sign up Tor servers, the situation is morecomplicated.  In the simplest case, if an adversary has compromised $c$ of$n$ servers on the Tor network, then the adversary will be able to compromisea random circuit with probability $\frac{c^2}{n^2}$ (since the circuitinitiator chooses hops randomly).  But there arecomplicating factors:\begin{tightlist}\item If the user continues to build random circuits over time, an adversary  is pretty certain to see a statistical sample of the user's traffic, and  thereby can build an increasingly accurate profile of her behavior.  (See  \ref{subsec:helper-nodes} for possible solutions.)\item An adversary who controls a popular service outside of the Tor network  can be certain of observing all connections to that service; he  therefore will trace connections to that service with probability  $\frac{c}{n}$.\item Users do not in fact choose servers with uniform probability; they  favor servers with high bandwidth or uptime, and exit servers that  permit connections to their favorite services.\end{tightlist}%discuss $\frac{c^2}{n^2}$, except how in practice the chance of owning%the last hop is not $c/n$ since that doesn't take the destination (website)%into account. so in cases where the adversary does not also control the%final destination we're in good shape, but if he *does* then we'd be better%off with a system that lets each hop choose a path.%%Isn't it more accurate to say ``If the adversary _always_ controls the final% dest, we would be just as well off with such as system.'' ?  If not, why% not? -nm% Sure. In fact, better off, since they seem to scale more easily. -rd% the below paragraph should probably move later, and merge with% other discussions of attack-tor-oak5.In practice Tor's threat model is based entirely on the goal ofdispersal and diversity. Murdoch and Danezis describe an attack\cite{attack-tor-oak05} that lets an attacker determine the nodes usedin a circuit; yet s/he cannot identify the initiator or responder,e.g., client or web server, through this attack. So the endpointsremain secure, which is the goal. It is conceivable that anadversary could attack or set up observation of all connectionsto an arbitrary Tor node in only a few minutes.  If such an adversarywere to exist, s/he could use this probing to remotely identify a nodefor further attack.  Of more likely immediate practical concernan adversary with active access to the responder trafficwants to keep a circuit alive long enough to attack an identifiednode. Thus it is important to prevent the responding end of the circuitfrom keeping it open indefinitely. Also, someone could identify nodes in this way and if in theirjurisdiction, immediately get a subpoena (if they even need one)telling the node operator(s) that she must retain all the activecircuit data she now has.Further, the enclave model, which had previously looked to be the mostgenerally secure, seems particularly threatened by this attack, sinceit identifies endpoints when they're also nodes in the Tor network:see Section~\ref{subsec:helper-nodes} for discussion of some ways toaddress this issue.see \ref{subsec:routing-zones} for discussion of largeradversaries and our dispersal goals.[this section will get written once the rest of the paper is farther along]\section{Crossroads: Policy issues}\label{sec:crossroads-policy}Many of the issues the Tor project needs to address are not just amatter of system design or technology development. In particular, theTor project's \emph{image} with respect to its users and the rest ofthe Internet impacts the security it can provide.As an example to motivate this section, some U.S.~Department of Energypenetration testing engineers are tasked with compromising DoE computersfrom the outside. They only have a limited number of ISPs from which tolaunch their attacks, and they found that the defenders were recognizingattacks because they came from the same IP space. These engineers wantedto use Tor to hide their tracks. First, from a technical standpoint,Tor does not support the variety of IP packets one would like to use insuch attacks (see Section~\ref{subsec:tcp-vs-ip}). But aside from this,we also decided that it would probably be poor precedent to encouragesuch use---even legal use that improves national security---and managedto dissuade them.With this image issue in mind, this section discusses the Tor user base andTor's interaction with other services on the Internet.\subsection{Image and security}A growing field of papers argue that usability for anonymity systemscontributes directly to their security, because how usable the systemis impacts the possible anonymity set~\cite{back01,econymics}. Orconversely, an unusable system attracts few users and thus can't providemuch anonymity.This phenomenon has a second-order effect: knowing this, users shouldchoose which anonymity system to use based in part on how usable\emph{others} will find it, in order to get the protection of a largeranonymity set. Thus we might replace the adage ``usability is a securityparameter''~\cite{back01} with a new one: ``perceived usability is asecurity parameter.'' From here we can better understand the effectsof publicity and advertising on security: the more convincing youradvertising, the more likely people will believe you have users, and thusthe more users you will attract. Perversely, over-hyped systems (if theyare not too broken) may be a better choice than modestly promoted ones,if the hype attracts more users~\cite{usability-network-effect}.So it follows that we should come up with ways to accurately communicatethe available security levels to the user, so she can make informeddecisions. JAP aims to do this by including acomforting `anonymity meter' dial in the software's graphical interface,giving the user an impression of the level of protection for her currenttraffic.However, there's a catch. For users to share the same anonymity set,they need to act like each other. An attacker who can distinguisha given user's traffic from the rest of the traffic will not bedistracted by other users on the network. For high-latency systems likeMixminion, where the threat model is based on mixing messages with eachother, there's an arms race between end-to-end statistical attacks andcounter-strategies~\cite{statistical-disclosure,minion-design,e2e-traffic,trickle02}.But for low-latency systems like Tor, end-to-end \emph{trafficcorrelation} attacks~\cite{danezis-pet2004,SS03,defensive-dropping}allow an attacker who can measure both ends of a communicationto match packet timing and volume, quickly linkingthe initiator to her destination. This is why Tor's threat model isbased on preventing the adversary from observing both the initiator andthe responder.Like Tor, the current JAP implementation does not pad connections(apart from using small fixed-size cells for transport). In fact,its cascade-based network topology may be even more vulnerable to theseattacks, because the network has fewer edges. JAP was born out ofthe ISDN mix design~\cite{isdn-mixes}, where padding made sense becauseevery user had a fixed bandwidth allocation, but in its current contextas a general Internet web anonymizer, adding sufficient padding to JAPwould be prohibitively expensive.\footnote{Even if they could find andmaintain extra funding to run higher-capacity nodes, our experiencesuggests that many users would not accept the increased per-userbandwidth requirements, leading to an overall much smaller user base. Butsee Section \ref{subsec:mid-latency}.} Therefore, since under this threatmodel the number of concurrent users does not seem to have much impacton the anonymity provided, we suggest that JAP's anonymity meter is notcorrectly communicating security levels to its users.% because more users don't help anonymity much, we need to rely more% on other incentive schemes, both policy-based (see sec x) and% technically enforced (see sec y)On the other hand, while the number of active concurrent users may notmatter as much as we'd like, it still helps to have some other userswho use the network. We investigate this issue in the next section.\subsection{Reputability}Another factor impacting the network's security is its reputability:the perception of its social value based on its current user base. If Alice isthe only user who has ever downloaded the software, it might be sociallyaccepted, but she's not getting much anonymity. Add a thousand animal rightsactivists, and she's anonymous, but everyone thinks she's a Bambi lover (orNRA member if you prefer a contrasting example). Add a thousandrandom citizens (cancer survivors, privacy enthusiasts, and so on)and now she's harder to profile.The more cancer survivors on Tor, the better for the human rightsactivists. The more script kiddies, the worse for the normal users. Thus,reputability is an anonymity issue for two reasons. First, it impactsthe sustainability of the network: a network that's always about to beshut down has difficulty attracting and keeping users, so its anonymityset suffers.% XXX but we said the anonymity set doesn't matter!Second, a disreputable network attracts the attention ofpowerful attackers who may not mind revealing the identities of all theusers to uncover a few bad ones.While people therefore have an incentive for the network to be used for``more reputable'' activities than their own, there are still tradeoffsinvolved when it comes to anonymity. To follow the above example, anetwork used entirely by cancer survivors might welcome some NRA membersonto the network, though of course they'd prefer a widervariety of users.Reputability becomes even more tricky in the case of privacy networks,since the good uses of the network (such as publishing by journalists indangerous countries) are typically kept private, whereas network abusesor other problems tend to be more widely publicized.The impact of public perception on security is especially importantduring the bootstrapping phase of the network, where the first fewwidely publicized uses of the network can dictate the types of users itattracts next.%% "outside of academia, jap has just lost, permanently".  (That is,%% even though the crime detection issues are resolved and are unlikely%% to go down the same way again, public perception has not been kind.)\subsection{Sustainability and incentives}One of the unsolved problems in low-latency anonymity designs ishow to keep the servers running.  Zero-Knowledge Systems's Freedom networkdepended on paying third parties to run its servers; the JAP project'sbandwidth depends on grants to pay for its bandwidth andadministrative expenses.  In Tor, bandwidth and administrative costs aredistributed across the volunteers who run Tor nodes, so we at least havereason to think that the Tor network could survive without continued researchfunding.\footnote{It also helps that Tor is implemented with free and open  source software that can be maintained by anybody with the ability and  inclination.}  But why are these volunteers running nodes, and what can wedo to encourage more volunteers to do so?We have not surveyed Tor operators to learn why they are running servers, butfrom the information they have provided, it seems that many of them run Tornodes for reasons of personal interest in privacy issues.  It is possiblethat others are running Tor for anonymity reasons, but of course they arehardly likely to tell us if they are.Significantly, Tor's threat model changes the anonymity incentives for runninga server.  In a high-latency mix network, users can receive additionalanonymity by running their own server, since doing so obscures when they areinjecting messages into the network.  But in Tor, anybody observing a Torserver can tell when the server is generating traffic that corresponds tonone of its incoming traffic.Still, anonymity and privacy incentives do remain for server operators:\begin{tightlist}\item Against a hostile website, running a Tor exit node can provide a degree  of ``deniability'' for traffic that originates at that exit node.  For  example, it is likely in practice that HTTP requests from a Tor server's IP  will be assumed to be from the Tor network.\item Local Tor entry and exit servers allow users on a network to run in an  `enclave' configuration.  [XXXX need to resolve this. They would do this   for E2E encryption + auth?]\end{tightlist}First, we try to make the costs of running a Tor server easily minimized.Since Tor is run by volunteers, the most crucial software usability issue isusability by operators: when an operator leaves, the network becomes lessusable by everybody.  To keep operators pleased, we must try to keep Tor'sresource and administrative demands as low as possible. [XXXX say more. E.g.,exit policies.]Because of ISP billing structures, many Tor operators have underused capacitythat they are willing to donate to the network, at no additional monetarycost to them.  Features to limit bandwidth have been essential to adoption.Also useful has been a ``hibernation'' feature that allows a server thatwants to provide high bandwidth, but no more than a certain amount in agiving billing cycle, to become dormant once its bandwidth is exhausted, andto reawaken at a random offset into the next billing cycle.  This feature hasinteresting policy implications, however; seesection~\ref{subsec:bandwidth-and-usability} below.[XXXX say more.  Why else would you run a server? What else can we do/do we  already do to make running a server more attractive?][We can enforce incentives; see Section 6.1. We can rate-limit clients.  We can put "top bandwidth servers lists" up a la seti@home.]\subsection{Bandwidth and usability}\label{subsec:bandwidth-and-usability}Once users have configured their applications to work with Tor, the largestremaining usability issue is bandwidth.  When websites ``feel slow,'' usersbegin to suffer.Clients currently try to build their connections through servers that theyguess will have enough bandwidth.  But even if capacity is allocatedoptimally, it seems unlikely that the current network architecture will haveenough capacity to provide every user with as much bandwidth as she wouldreceive if she weren't using Tor, unless far more servers join the network(see above).Limited capacity does not destroy the network, however.  Instead, usage tendstowards an equilibrium: when performance suffers, users who value performanceover anonymity tend to leave the system, thus freeing capacity until theremaining users on the network are exactly those willing to use that capacitythere is.XXX what if the file-sharers are more persistent than the journalists?\subsection{Tor and file-sharing}%One potentially problematical area with deploying Tor has been our response%to file-sharing applications.File-sharing applications make up an enormousfraction of the traffic on the Internet today, and provide two challenges toany anonymizing network: their intensive bandwidth requirement, and thedegree to which they are associated (correctly or not) with copyrightviolation.As noted above, high-bandwidth protocols can make the network unresponsive,but tend to be somewhat self-correcting.  Issues of copyright violation,however, are more interesting.  Typical exit node operators want to helppeople achieve private and anonymous speech, not to help people (say) hostVin Diesel movies for download; and typical ISPs would rather notdeal with customers who incur them the overhead of getting menacing lettersfrom the MPAA.  While it is quite likely that the operators are doing nothingillegal, many ISPs have policies of dropping users who get repeated legalthreats regardless of the merits of those threats, and many operators wouldprefer to avoid receiving legal threats even if those threats have littlemerit.  So when the letters arrive, operators are likely to facepressure to block filesharing applications entirely, in order to avoid thehassle.But blocking filesharing would not necessarily be easy; most popularprotocols have evolved to run on a variety of non-standard ports in order toget around other port-based bans.  Thus, exit node operators who wanted toblock filesharing would have to find some way to integrate Tor with aprotocol-aware exit filter.  This could be a technically expensiveundertaking, and one with poor prospects: it is unlikely that Tor exit nodeswould succeed where so many institutional firewalls have failed.  Anotherpossibility for sensitive operators is to run a restrictive server thatonly permits exit connections to a restricted range of ports which arenot frequently associated with file sharing.  There are increasingly few suchports.For the moment, it seems that Tor's bandwidth issues have rendered itunattractive for bulk file-sharing traffic; this may continue to be so in thefuture.  Nevertheless, Tor will likely remain attractive for limited use infilesharing protocols that have separate control and data channels.[xxxx We should say more -- but what?  That we'll see a similar  equilibriating effect as with bandwidth, where sensitive ops switch to  middleman, and we become less useful for filesharing, so the filesharing  people back off, so we get more ops since there's less filesharing, so the  filesharers come back, etc.]in practice, plausible deniability is hypothetical and doesn't seem veryconvincing. if ISPs find the activity antisocial, they don't care *why*your computer is doing that behavior.\subsection{Tor and blacklists}It was long expected that, alongside Tor's legitimate users, it would alsoattract troublemakers who exploited Tor in order to abuse services on theInternet.[XXX we're not talking bandwidth abuse here, we're talking vandalism,hate mails via hotmail, attacks, etc.]Our initial answer to this situation was to use ``exit policies''to allow individual Tor servers to block access to specific IP/port ranges.This approach was meant to make operators more willing to run Tor by allowingthem to prevent their servers from being used for abusing particularservices.  For example, all Tor servers currently block SMTP (port 25), inorder to avoid being used to send spam.This approach is useful, but is insufficient for two reasons.  First, sinceit is not possible to force all servers to block access to any given service,many of those services try to block Tor instead.  More broadly, while beingblockable is important to being good netizens, we would like to encourageservices to allow anonymous access; services should not need to decidebetween blocking legitimate anonymous use and allowing unlimited abuse.This is potentially a bigger problem than it may appear. On the one hand, if people want to refuse connections from you ontheir servers it would seem that they should be allowed to.  But, apossible major problem with the blocking of Tor is that it's not justthe decision of the individual server administrator whose deciding ifhe wants to post to Wikipedia from his Tor node address or allowpeople to read Wikipedia anonymously through his Tor node. (Wikipediahas blocked all posting from all Tor nodes based on IP address.) If e.g.,s/he comes through a campus or corporate NAT, then the decision mustbe to have the entire population behind it able to have a Tor exitnode or to have write access to Wikipedia. This is a loss for both of us (Torand Wikipedia). We don't want to compete for (or divvy up) the NATprotected entities of the world.(A related problem is that many IP blacklists are not terribly fine-grained.No current IP blacklist, for example, allow a service provider to blacklistonly those Tor servers that allow access to a specific IP or port, eventhough this information is readily available.  One IP blacklist even bansevery class C network that contains a Tor server, and recommends banning SMTPfrom these networks even though Tor does not allow SMTP at all.)[****Since this is stupid and we oppose it, shouldn't we name names here -pfs][XXX also, they're making \emph{middleman nodes leave} because they're caught up in the standoff!]Problems of abuse occur mainly with services such as IRC networks andWikipedia, which rely on IP blocking to ban abusive users.  While at firstblush this practice might seem to depend on the anachronistic assumption thateach IP is an identifier for a single user, it is actually more reasonable inpractice: it assumes that non-proxy IPs are a costly resource, and that anabuser can not change IPs at will.  By blocking IPs which are used by Torservers, open proxies, and service abusers, these systems hope to makeongoing abuse difficult.  Although the system is imperfect, it workstolerably well for them in practice.But of course, we would prefer that legitimate anonymous users be able toaccess abuse-prone services.  One conceivable approach would be to requirewould-be IRC users, for instance, to register accounts if they wanted toaccess the IRC network from Tor.  But in practise, this would notsignificantly impede abuse if creating new accounts were easily automatable;[ XXX yahoo uses captchas in exactly this situation]this is why services use IP blocking.  In order to deter abuse, pseudonymousidentities need to require a significant switching cost in resources or humantime.%One approach, similar to that taken by Freedom, would be to bootstrap some%non-anonymous costly identification mechanism to allow access to a%blind-signature pseudonym protocol.  This would effectively create costly%pseudonyms, which services could require in order to allow anonymous access.%This approach has difficulties in practise, however:%\begin{tightlist}%\item Unlike Freedom, Tor is not a commercial service.  Therefore, it would%  be a shame to require payment in order to make Tor useful, or to make%  non-paying users second-class citizens.%\item It is hard to think of an underlying resource that would actually work.%  We could use IP addresses, but that's the problem, isn't it?%\item Managing single sign-on services is not considered a well-solved%  problem in practice.  If Microsoft can't get universal acceptance for%  Passport, why do we think that a Tor-specific solution would do any good?%\item Even if we came up with a perfect authentication system for our needs,%  there's no guarantee that any service would actually start using it.  It%  would require a nonzero effort for them to support it, and it might just%  be less hassle for them to block tor anyway.%\end{tightlist}The use of squishy IP-based ``authentication'' and ``authorization''has not broken down even to the level that SSNs used for thesepurposes have in commercial and public record contexts. Externalitiesand misplaced incentives cause a continued focus on fighting identitytheft by protecting SSNs rather than developing better authenticationand incentive schemes \cite{price-privacy}. Similarly we can expect acontinued use of identification by IP number as long as there is noworkable alternative.%Fortunately, our modular design separates%routing from node discovery; so we could implement Morphmix in Tor just%by implementing the Morphmix-specific node discovery and path selection%pieces.\section{Crossroads: Design choices}\label{sec:crossroads-design}\subsection{Transporting the stream vs transporting the packets}\label{subsec:stream-vs-packet}\label{subsec:tcp-vs-ip}We periodically run into ex ZKS employees who tell us that the process ofanonymizing IPs should ``obviously'' be done at the IP layer. Here arethe issues that need to be resolved before we'll be ready to switch Torover to arbitrary IP traffic.\begin{enumerate}\setlength{\itemsep}{0mm}\setlength{\parsep}{0mm}\item \emph{IP packets reveal OS characteristics.} We still need to doIP-level packet normalization, to stop things like IP fingerprintingattacks. There likely exist libraries that can help with this.\item \emph{Application-level streams still need scrubbing.} We still needTor to be easy to integrate with user-level application-specific proxiessuch as Privoxy. So it's not just a matter of capturing packets andanonymizing them at the IP layer.\item \emph{Certain protocols will still leak information.} For example,DNS requests destined for my local DNS servers need to be rewrittento be delivered to some other unlinkable DNS server. This requiresunderstanding the protocols we are transporting.\item \emph{The crypto is unspecified.} First we need a block-level encryptionapproach that can provide security despitepacket loss and out-of-order delivery. Freedom allegedly had one, but it wasnever publicly specified. %, and we believe it's likely vulnerable to tagging%attacks \cite{tor-design}.Also, TLS over UDP is not implemented or evenspecified, though some early work has begun on that~\cite{dtls}.\item \emph{We'll still need to tune network parameters}. Since the aboveencryption system will likely need sequence numbers (and maybe more) to doreplay detection, handle duplicate frames, etc, we will be reimplementingsome subset of TCP anyway.\item \emph{Exit policies for arbitrary IP packets mean building a secureIDS.}  Our server operators tell us that exit policies are one ofthe main reasons they're willing to run Tor.Adding an Intrusion Detection System to handle exit policies wouldincrease the security complexity of Tor, and would likely not work anyway,as evidenced by the entire field of IDS and counter-IDS papers. Manypotential abuse issues are resolved by the fact that Tor only transportsvalid TCP streams (as opposed to arbitrary IP including malformed packetsand IP floods), so exit policies become even \emph{more} important aswe become able to transport IP packets. We also need a way to compactlycharacterize the exit policies and let clients parse them to predictwhich nodes will allow which packets to exit.\item \emph{The Tor-internal name spaces would need to be redesigned.} Wesupport hidden service {\tt{.onion}} addresses, and other special addresseslike {\tt{.exit}} for the user to request a particular exit server,by intercepting the addresses when they are passed to the Tor client.\end{enumerate}This list is discouragingly long right now, but we recognize that itwould be good to investigate each of these items in further depth and tounderstand which are actual roadblocks and which are easier to resolvethan we think. We certainly wouldn't mind if Tor one day is able totransport a greater variety of protocols.\subsection{Mid-latency}\label{subsec:mid-latency}Though Tor has always been designed to be practical and usable firstwith as much anonymity as can be built in subject to those goals, wehave contemplated that users might need resistance to at least simpletraffic correlation attacks.  Higher-latency mix-networks resist theseattacks by introducing variability into message arrival times in order tosuppress timing correlation.  Thus, it seems worthwhile to consider thewhether we can improving Tor's anonymity by introducing batching and delayingstrategies to the Tor messages to prevent observers from linking incoming andoutgoing traffic.Before we consider the engineering issues involved in the approach, ofcourse, we first need to study whether it can genuinely make users moreanonymous.  Research on end-to-end traffic analysis on higher-latency mixnetworks~\cite{e2e-traffic} indicates that as timing variance decreases,timing correlation attacks require increasingly less data; it might be thecase that Tor can't resist timing attacks for longer than a few minuteswithout increasing message delays to an unusable degree.  Conversely, if Torcan remain usable and slow timing attacks by even a matter of hours, thiswould represent a significant improvement in practical anonymity: protectingshort-duration, once-off activities against a global observer is better thanprotecting no activities at all.  In order to answer this question, we mighttry to adapt the techniques of~\cite{e2e-traffic} to a lower-latency mixnetwork, where instead of sending uncorrelated messages, users send batchesof cells in temporally clustered connections.Once the anonymity questions are answered, we need to consider usability.  Ifthe latency could be kept to two or three times its current overhead, thismight be acceptable to most Tor users. However, it might also destroy much ofthe user base, and it is difficult to know in advance.  Note also that inpractice, as the network grows to incorporate more DSL and cable-modem nodes,and more nodes in various continents, this alone will \emph{already} causemany-second delays for some transactions.  Reducing this latency will behard, so perhaps it's worth considering whether accepting this higher latencycan improve the anonymity we provide.  Also, it could be possible torun a mid-latency option over the Tor network for thoseusers either willing to experiment or in need of moreanonymity.  This would allow us to experiment with boththe anonymity provided and the interest on the part of users.Adding a mid-latency option should not require significant fundamentalchange to the Tor client or server design; circuits could be labeled aslow- or mid- latency as they are constructed. Low-latency trafficwould be processed as now, while cells on on circuits that are mid-latencywould be sent in uniform-size chunks at synchronized intervals.  (Trafficalready moves through the Tor network in fixed-sized cells; this wouldincrease the granularity.)  If servers forward these chunks in roughlysynchronous  fashion, it will increase the similarity of data stream timingsignatures. By experimenting with the granularity of data chunks andof synchronization we can attempt once again to optimize for bothusability and anonymity. Unlike in \cite{sync-batching}, it may beimpractical to synchronize on network batches by dropping chunks froma batch that arrive late at a given node---unless Tor moves away fromstream processing to a more loss-tolerant paradigm (cf.\Section~\ref{subsec:tcp-vs-ip}). Instead, batch timing would be obscured bysynchronizing batches at the link level, and there wouldbe no direct attempt to synchronize all batchesentering the Tor network at the same time.%Alternatively, if end-to-end traffic correlation is the%concern, there is little point in mixing.%   Why not?? -NMIt might also be feasible topad chunks to uniform size as is done now for cells; if this is linkpadding rather than end-to-end, then it will take less overhead,especially in bursty environments.% This is another way in which it%would be fairly practical to set up a mid-latency option within the%existing Tor network.Other padding regimens might supplement themid-latency option; however, we should continue the caution with whichwe have always approached padding lest the overhead cost us too muchperformance or too many volunteers.The distinction between traffic correlation and traffic analysis isnot as cut and dried as we might wish. In \cite{hintz-pet02} it wasshown that if data volumes of various popularresponder destinations are catalogued, it may not be necessary toobserve both ends of a stream to learn a source-destination link.This should be fairly effective without simultaneously observing bothends of the connection. However, it is still essentially confirmingsuspected communicants where the responder suspects are ``stored'' ratherthan observed at the same time as the client.Similarly latencies of going through various routes can becatalogued~\cite{back01} to connect endpoints.This is likely to entail high variability and massive storage since% XXX hintz-pet02 just looked at data volumes of the sites. this% doesn't require much variability or storage. I think it works% quite well actually. Also, \cite{kesdogan:pet2002} takes the% attack another level further, to narrow down where you could be% based on an intersection attack on subpages in a website. -RD%% I was trying to be terse and simultaneously referring to both the% Hintz stuff and the Back et al. stuff from Info Hiding 01. I've% separated the two and added the references. -PFSroutes through the network to each site will be random even if theyhave relatively unique latency characteristics. So this doesnot seem an immediate practical threat. Further along similar lines,the same paper suggested a ``clogging attack''. A version of thiswas demonstrated to be practical in\cite{attack-tor-oak05}. There it was shown that an outside attacker cantrace a stream through the Tor network while a stream is still activesimply by observing the latency of his own traffic sent throughvarious Tor nodes. These attacks are especially significant since theycounter previous results that running one's own onion router protectsbetter than using the network from the outside. The attacks do notshow the client address, only the first server within the Tor network,making helper nodes all the more worthy of exploration for enclaveprotection. Setting up a mid-latency subnet as described above wouldbe another significant step to evaluating resistance to such attacks.The attacks in \cite{attack-tor-oak05} are also dependent oncooperation of the responding application or the ability to modify ormonitor the responder stream, in order of decreasing attackeffectiveness.  So, another way to slow some of these attackswould be to cache responses at exit servers where possible, as it is withDNS lookups and cacheable HTTP responses.  Caching would, however,create threats of its own. First, a Tor network is expected to containhostile nodes. If one of these is the repository of a cache, theattack is still possible. Though more work to set up a Tor node andcache repository, the payoff of such an attack is potentiallyhigher.%To be%useful, such caches would need to be distributed to any likely exit%nodes of recurred requests for the same data.%   Even local caches could be useful, I think. -NM%%Added some clarification -PFSBesides allowing any other insider attacks, caching nodes would hold arecord of destinations and data visited by Tor users reducing forwardanonymity. Worse, for the cache to be widely useful much beyond theclient that caused it there would have to either be a new mechanism todistribute cache information around the network and a way for clientsto make use of it or the caches themselves would need to bedistributed widely. Either way the record of visited sites anddownloaded information is made automatically available to an attackerwithout having to actively gather it himself.  Besides its inherentvalue, this could serve as useful data to an attacker deciding whichlocations to target for confirmation. A way to counter thisdistribution threat might be to only cache at certain semitrustedhelper nodes.  This might help specific clients, but it would limitthe general value of caching.%Does that cacheing discussion belong in low-latency?\subsection{Application support: SOCKS and beyond}Tor supports the SOCKS protocol, which provides a standardized interface forgeneric TCP proxies.  Unfortunately, this is not a complete solution formany applications and platforms:\begin{tightlist}\item Many applications do not support SOCKS. To support such applications,  it's necessary to replace the networking system calls with SOCKS-aware  versions, or to run a local SOCKS tunnel and convince the applications to  connect to localhost.  Neither of these tasks is easy for the average user,  even with good instructions.\item Even when applications do use SOCKS, they often make DNS requests  themselves.  (The various versions of the SOCKS protocol include some where  the application tells the proxy an IP address, and some where it sends a  hostname.)  By connecting to the DNS sever directly, the application breaks  the user's anonymity and advertises where it is about to connect.\end{tightlist}So in order to actually provide good anonymity, we need to make sure thatusers have a practical way to use Tor anonymously.  Possibilities includewriting wrappers for applications to anonymize them automatically; improvingthe applications' support for SOCKS; writing libraries to help applicationwriters use Tor properly; and implementing a local DNS proxy to reroute DNSrequests to Tor so that applications can simply point their DNS resolvers atlocalhost and continue to use SOCKS for data only.\subsection{Measuring performance and capacity}One of the paradoxes with engineering an anonymity network is that we'd liketo learn as much as we can about how traffic flows so we can improve thenetwork, but we want to prevent others from learning how traffic flows inorder to trace users' connections through the network.  Furthermore, manymechanisms that help Tor run efficiently (such as having clients choose serversbased on their capacities) require measurements about the network.Currently, servers record their bandwidth use in 15-minute intervals andinclude this information in the descriptors they upload to the directory.They also try to deduce their own available bandwidth, on the basis of howmuch traffic they have been able to transfer recently, and upload thisinformation as well.This is, of course, eminantly cheatable.  A malicious server can get adisproportionate amount of traffic simply by claiming to have more bandiwdththan it does.  But better mechanisms have their problems.  If bandwidth datais to be measured rather than self-reported, it is usually possible forservers to selectively provide better service for the measuring party, orsabotage the measured value of other servers.  Complex solutions formix networks have been proposed, but do not address the issuescompletely~\cite{mix-acc,casc-rep}.Even without the possibility of cheating, network measurement isnon-trivial.  It is far from unusual for one observer's view of a server'slatency or bandwidth to disagree wildly with another's.  Furthermore, it isunclear whether total bandwidth is really the right measure; perhaps clientsshould be considering servers on the basis of unused bandwidth instead, orperhaps observed throughput.% XXXX say more here?%How to measure performance without letting people selectively deny service%by distinguishing pings. Heck, just how to measure performance at all. In%practice people have funny firewalls that don't match up to their exit%policies and Tor doesn't deal.%Network investigation: Is all this bandwidth publishing thing a good idea?%How can we collect stats better? Note weasel's smokeping, at%http://seppia.noreply.org/cgi-bin/smokeping.cgi?target=Tor%which probably gives george and steven enough info to break tor?Even if we can collect and use this network information effectively, we needto make sure that it is not more useful to attackers than to us.  While itseems plausible that bandwidth data alone is not enough to revealsender-recipient connections under most circumstances, it could certainlyreveal the path taken by large traffic flows under low-usage circumstances.\subsection{Running a Tor server, path length, and helper nodes}It has been thought for some time that the best anonymity protectioncomes from running your own onion router~\cite{or-pet00,tor-design}.(In fact, in Onion Routing's first design, this was the only optionpossible~\cite{or-ih96}.) The first design also had a fixed pathlength of five nodes. Middle Onion Routing involved much analysis(mostly unpublished) of route selection algorithms and path lengthalgorithms to combine efficiency with unpredictability in routes.Since, unlike Crowds, nodes in a route cannot all know the ultimatedestination of an application connection, it was generally notconsidered significant if a node could determine via latency that itwas second in the route. But if one followed Tor's three node defaultpath length, an enclave-to-enclave communication (in which two of theORs were at each enclave) would be completely compromised by themiddle node. Thus for enclave-to-enclave communication, four is the fewestnumber of nodes that preserves the $\frac{c^2}{n^2}$ degree of protectionin any setting.The Murdoch-Danezis attack, however, shows that simply adding to thepath length may not protect usage of an enclave protecting OR\@.  Ahostile web server can determine all of the nodes in a three node Torpath. The attack only identifies that a node is on the route, notwhere. For example, if all of the nodes on the route were enclavenodes, the attack would not identify which of the two not directlyvisible to the attacker was the source.  Thus, there remains anelement of plausible deniability that is preserved for enclave nodes.However, Tor has always sought to be stronger than plausibledeniability. Our assumption is that users of the network are concernedabout being identified by an adversary, not with being proven guiltybeyond any reasonable doubt. Still it is something, and may be desiredin some settings.It is reasonable to think that this attack can be easily extended tolonger paths should those be used; nonetheless there may be someadvantage to random path length. If the number of nodes is unknown,then the adversary would need to send streams to all the nodes in thenetwork and analyze the resulting latency from them to be reasonablycertain that it has not missed the first node in the circuit. Also,the attack does not identify the order of nodes in a route, so thelonger the route, the greater the uncertainty about which node mightbe first. It may be possible to extend the attack to learn the routenode order, but has not been shown whether this is practically feasible.If so, the incompleteness uncertainty engendered by random lengths wouldremain, but once the complete set of nodes in the route were identifiedthe initiating node would also be identified.Another way to reduce the threats to both enclaves and simple Torclients is to have helper nodes. Helper nodes were introducedin~\cite{wright03} as a suggested means of protecting the identityof the initiator of a communication in various anonymity protocols.The idea is to use a single trusted node as the first one you go to,that way an attacker cannot ever attack the first nodes you connectto and do some form of intersection attack. This will not affect theDanezis-Murdoch attack at all if the attacker can time latencies toboth the helper node and the enclave node.We have to pick the path length so adversary can't distinguish client fromserver (how many hops is good?).\subsection{Helper nodes}\label{subsec:helper-nodes}Tor can only provide anonymity against an attacker if that attacker can'tmonitor the user's entry and exit on the Tor network.  But since Torcurrently chooses entry and exit points randomly and changes them frequently,a patient attacker who controls a single entry and a single exit is sure toeventually break some circuits of frequent users who consider those servers.(We assume that users are as concerned about statistical profiling as aboutthe anonymity any particular connection.  That is, it is almost as bad toleak the fact that Alice {\it sometimes} talks to Bob as it is to leak the timeswhen Alice is {\it actually} talking to Bob.)One solution to this problem is to use ``helper nodes''~\cite{wright02,wright03}---tohave each client choose a few fixed servers for critical positions in hercircuits.  That is, Alice might choose some server H1 as her preferredentry, so that unless the attacker happens to control or observe herconnection to H1, her circuits will remain anonymous.  If H1 is compromised,Alice is vunerable as before.  But now, at least, she has a chance ofnot being profiled.(Choosing fixed exit nodes is less useful, since the connection from the exitnode to Alice's destination will be seen not only by the exit but by thedestination.  Even if Alice chooses a good fixed exit node, she maynevertheless connect to a hostile website.)There are still obstacles remaining before helper nodes can be implemented.For one, the litereature does not describe how to choose helpers from a listof servers that changes over time.  If Alice is forced to choose a new entryhelper every $d$ days, she can expect to choose a compromised server aroundevery $dc/n$ days.  Worse, an attacker with the ability to DoS servers couldforce their users to switch helper nodes more frequently.%Do general DoS attacks have anonymity implications? See e.g. Adam%Back's IH paper, but I think there's more to be pointed out here. -RD% Not sure what you want to say here. -NM%Game theory for helper nodes: if Alice offers a hidden service on a%server (enclave model), and nobody ever uses helper nodes, then against%George+Steven's attack she's totally nailed. If only Alice uses a helper%node, then she's still identified as the source of the data. If everybody%uses a helper node (including Alice), then the attack identifies the%helper node and also Alice, and knows which one is which. If everybody%uses a helper node (but not Alice), then the attacker figures the real%source was a client that is using Alice as a helper node. [How's my%logic here?] -RD%% Not sure about the logic.  For the attack to work with helper nodes, the%attacker needs to guess that Alice is running the hidden service, right?%Otherwise, how can he know to measure her traffic specifically? -NM%point to routing-zones section re: helper nodes to defend against%big stuff.\subsection{Location-hidden services}\label{subsec:hidden-services}While most of the discussions about have been about forward anonymitywith Tor, it also provides support for \emph{rendezvous points}, whichlet users provide TCP services to other Tor users without revealingtheir location. Since this feature is relatively recent, we describe herea couple of our early observations from its deployment.First, our implementation of hidden services seems less hidden than we'dlike, since they are configured on a single client and get used overand over---particularly because an external adversary can induce them toproduce traffic. They seem the ideal use case for our above discussionof helper nodes. This insecurity means that they may not be suitable asa building block for Free Haven~\cite{freehaven-berk} or other anonymouspublishing systems that aim to provide long-term security.%Also, they're brittle in terms of intersection and observation attacks.\emph{Hot-swap} hidden services, where more than one location canprovide the service and loss of any one location does not imply achange in service, would help foil intersection and observation attackswhere an adversary monitors availability of a hidden service and alsomonitors whether certain users or servers are online. However, the designchallenges in providing these services without otherwise compromisingthe hidden service's anonymity remain an open problem.In practice, hidden services are used for more than just providing privateaccess to a web server or IRC server. People are using hidden servicesas a poor man's VPN and firewall-buster. Many people want to be ableto connect to the computers in their private network via secure shell,and rather than playing with dyndns and trying to pierce holes in theirfirewall, they run a hidden service on the inside and then rendezvouswith that hidden service externally.Also, sites like Bloggers Without Borders (www.b19s.org) are advertisinga hidden-service address on their front page. Doing this can provideincreased robustness if they use the dual-IP approach we describe intor-design, but in practice they do it firstly to increase visibilityof the tor project and their support for privacy, and secondly to offera way for their users, using unmodified software, to get end-to-endencryption and end-to-end authentication to their website.\subsection{Trust and discovery}[arma will edit this and expand/retract it]The published Tor design adopted a deliberately simplistic design forauthorizing new nodes and informing clients about servers and their status.In the early Tor designs, all ORs periodically uploaded a signed descriptionof their locations, keys, and capabilities to each of several well-known {\it  directory servers}.  These directory servers constructed a signed summaryof all known ORs (a ``directory''), and a signed statement of which ORs theybelieved to be operational at any given time (a ``network status'').  Clientsperiodically downloaded a directory in order to learn the latest ORs andkeys, and more frequently downloaded a network status to learn which ORs arelikely to be running.  ORs also operate as directory caches, in order tolighten the bandwidth on the authoritative directory servers.In order to prevent Sybil attacks (wherein an adversary signs up manypurportedly independent servers in order to increase her chances of observinga stream as it enters and leaves the network), the early Tor directory designrequired the operators of the authoritative directory servers to manuallyapprove new ORs.  Unapproved ORs were included in the directory, but clientsdid not use them at the start or end of their circuits.  In practice,directory administrators performed little actual verification, and tended toapprove any OR whose operator could compose a coherent email.  This proceduremay have prevented trivial automated Sybil attacks, but would do littleagainst a clever attacker.There are a number of flaws in this system that need to be addressed as wemove forward.  They include:\begin{tightlist}\item Each directory server represents an independent point of failure; if  any one were compromised, it could immediately compromise all of its users  by recommending only compromised ORs.\item The more servers appear join the network, the more unreasonable it  becomes to expect clients to know about them all.  Directories  become unfeasibly large, and downloading the list of servers becomes  burdonsome.\item The validation scheme may do as much harm as it does good.  It is not  only incapable of preventing clever attackers from mounting Sybil attacks,  but may deter server operators from joining the network.  (For instance, if  they expect the validation process to be difficult, or if they do not share  any languages in common with the directory server operators.)\end{tightlist}We could try to move the system in several directions, depending on ourchoice of threat model and requirements.  If we did not need to increasenetwork capacity in order to support more users, there would be no reason notto adopt even stricter validation requirements, and reduce the number ofservers in the network to a trusted minimum.  But since we want Tor to workfor as many users as it can, we need XXXXXIn order to address the first two issues, it seems wise to move to a systemincluding a number of semi-trusted directory servers, no one of which cancompromise a user on its own.  Ultimately, of course, we cannot escape theproblem of a first introducer: since most users will run Tor in whateverconfiguration the software ships with, the Tor distribution itself willremain a potential single point of failure so long as it includes the seedkeys for directory servers, a list of directory servers, or any other meansto learn which servers are on the network.  But omitting this informationfrom the Tor distribution would only delegate the trust problem to theindividual users, most of whom are presumably less informed about how to maketrust decisions than the Tor developers.%Network discovery, sybil, node admission, scaling. It seems that the code%will ship with something and that's our trust root. We could try to get%people to build a web of trust, but no. Where we go from here depends%on what threats we have in mind. Really decentralized if your threat is%RIAA; less so if threat is to application data or individuals or...\section{Crossroads: Scaling}%\label{sec:crossroads-scaling}%P2P + anonymity issues:Tor is running today with hundreds of servers and tens of thousands ofusers, but it will certainly not scale to millions.Scaling Tor involves three main challenges.  First is safe serverdiscovery, both bootstrapping -- how a Tor client can robustly find aninitial server list -- and ongoing -- how a Tor client can learn abouta fair sample of honest servers and not let the adversary control hiscircuits (see Section~\ref{}).  Second is detecting and handling the speedand reliability of the variety of servers we must use if we want toaccept many servers (see Section~\ref{}).Since the speed and reliability of a circuit is limited by its worst link,we must learn to track and predict performance.  Finally, in order to geta large set of servers in the first place, we must address incentivesfor users to carry traffic for others (see Section incentives).\subsection{Incentives by Design}[nick will try to make this section shorter and more to the point.][most of the technical incentive schemes in the literature introduceanonymity issues which we don't understand yet, and we seem to be doingok without them]There are three behaviors we need to encourage for each server: relayingtraffic; providing good throughput and reliability while doing it;and allowing traffic to exit the network from that server.We encourage these behaviors through \emph{indirect} incentives, thatis, designing the system and educating users in such a way that userswith certain goals will choose to relay traffic.  In practice, themain incentive for running a Tor server is social benefit: volunteersaltruistically donate their bandwidth and time.  We also keep publicrankings of the throughput and reliability of servers, much likeseti@home.  We further explain to users that they can get \emph{bettersecurity} by operating a server, because they get plausible deniability(indeed, they may not need to route their own traffic through Tor at all-- blending directly with other traffic exiting Tor may be sufficientprotection for them), and because they can use their own Tor serveras entry or exit point and be confident it's not run by the adversary.Finally, we can improve the usability and feature set of the software:rate limiting support and easy packaging decrease the hassle ofmaintaining a server, and our configurable exit policies allow eachoperator to advertise a policy describing the hosts and ports to whichhe feels comfortable connecting.Beyond these, however, there is also a need for \emph{direct} incentives:providing payment or other resources in return for high-quality service.Paying actual money is problematic: decentralized e-cash systems arenot yet practical, and a centralized collection system not only reducesrobustness, but also has failed in the past (the history of commercialanonymizing networks is littered with failed attempts).  A more promisingoption is to use a tit-for-tat incentive scheme: provide better serviceto nodes that have provided good service to you.Unfortunately, such an approach introduces new anonymity problems.Does the incentive system enable the adversary to attract more traffic byperforming well? Typically a user who chooses evenly from all options ismost resistant to an adversary targetting him, but that approach preventsus from handling heterogeneous servers \cite{casc-rep}.When a server (call him Steve) performs well for Alice, does Steve gainreputation with the entire system, or just with Alice? If the entiresystem, how does Alice tell everybody about her experience in a way thatprevents her from lying about it yet still protects her identity? IfSteve's behavior only affects Alice's behavior, does this allow Steve toselectively perform only for Alice, and then break her anonymity laterwhen somebody (presumably Alice) routes through his node?These are difficult and open questions, yet choosing not to scale meansleaving most users to a less secure network or no anonymizing networkat all.  We will start with a simplified approach to the tit-for-tatincentive scheme based on two rules: (1) each node should measure theservice it receives from adjacent nodes, and provide service relative tothe received service, but (2) when a node is making decisions that affectits own security (e.g. when building a circuit for its own applicationconnections), it should choose evenly from a sufficiently large set ofnodes that meet some minimum service threshold.  This approach allows usto discourage bad service without opening Alice up as much to attacks.%XXX rewrite the above so it sounds less like a grant proposal and%more like a "if somebody were to try to solve this, maybe this is a%good first step".%We should implement the above incentive scheme in the%deployed Tor network, in conjunction with our plans to add the necessary%associated scalability mechanisms.  We will do experiments (simulated%and/or real) to determine how much the incentive system improves%efficiency over baseline, and also to determine how far we are from%optimal efficiency (what we could get if we ignored the anonymity goals).\subsection{Peer-to-peer / practical issues}[leave this section for now, and make sure things here are coveredelsewhere. then remove it.]Making use of servers with little bandwidth. How to handle hammering bycertain applications.Handling servers that are far away from the rest of the network, e.g. onthe continents that aren't North America and Europe. High latency,often high packet loss.Running Tor servers behind NATs, behind great-firewalls-of-China, etc.Restricted routes. How to propagate to everybody the topology? BGPstyle doesn't work because we don't want just *one* path. Point toGeoff's stuff.\subsection{Location diversity and ISP-class adversaries}\label{subsec:routing-zones}Anonymity networks have long relied on diversity of node location forprotection against attacks---typically an adversary who can observe alarger fraction of the network can launch a more effective attack. Oneway to achieve dispersal involves growing the network so a given adversarysees less. Alternately, we can arrange the topology so traffic can enteror exit at many places (for example, by using a free-route networklike Tor rather than a cascade network like JAP). Lastly, we can usedistributed trust to spread each transaction over multiple jurisdictions.But how do we decide whether two nodes are in related locations?Feamster and Dingledine defined a \emph{location diversity} metricin \cite{feamster:wpes2004}, and began investigating a variant of locationdiversity based on the fact that the Internet is divided into thousands ofindependently operated networks called {\em autonomous systems} (ASes).The key insight from their paper is that while we typically think of aconnection as going directly from the Tor client to her first Tor node,actually it traverses many different ASes on each hop. An adversary atany of these ASes can monitor or influence traffic. Specifically, givenplausible initiators and recipients and path random path selection,some ASes in the simulation were able to observe 10\% to 30\% of thetransactions (that is, learn both the origin and the destination) onthe deployed Tor network (33 nodes as of June 2004).The paper concludes that for best protection against the AS-leveladversary, nodes should be in ASes that have the most links to other ASes:Tier-1 ISPs such as AT\&T and Abovenet. Further, a given transactionis safest when it starts or ends in a Tier-1 ISP. Therefore, assuminginitiator and responder are both in the U.S., it actually \emph{hurts}our location diversity to add far-flung nodes in continents like Asiaor South America.Many open questions remain. First, it will be an immense engineeringchallenge to get an entire BGP routing table to each Tor client, or atleast summarize it sufficiently. Without a local copy, clients won't beable to safely predict what ASes will be traversed on the various pathsthrough the Tor network to the final destination. Tarzan~\cite{tarzan:ccs02}and MorphMix~\cite{morphmix:fc04} suggest that we compare IP prefixes todetermine location diversity; but the above paper showed that in practicemany of the Mixmaster nodes that share a single AS have entirely differentIP prefixes. When the network has scaled to thousands of nodes, does IPprefix comparison become a more useful approximation?%Second, can take advantage of caching certain content at the exit nodes, tolimit the number of requests that need to leave the network at all.what about taking advantage of caches like akamai's or googles? whatabout treating them as adversaries?%Third, if we follow the paper's recommendations and tailor path selectionto avoid choosing endpoints in similar locations, how much are we hurtinganonymity against larger real-world adversaries who can take advantageof knowing our algorithm?%Lastly, can we use this knowledge to figure out which gaps in our networkwould most improve our robustness to this class of attack, and go recruitnew servers with those ASes in mind?Tor's security relies in large part on the dispersal properties of itsnetwork. We need to be more aware of the anonymity properties of variousapproaches we can make better design decisions in the future.\subsection{The China problem}\label{subsec:china}Citizens in a variety of countries, such as most recently China andIran, are periodically blocked from accessing various sites outsidetheir country. These users try to find any tools available to allowthem to get-around these firewalls. Some anonymity networks, such asSix-Four~\cite{six-four}, are designed specifically with this goal inmind; others like the Anonymizer~\cite{anonymizer} are paid by sponsorssuch as Voice of America to set up a network to encourage Internetfreedom. Even though Tor wasn'tdesigned with ubiquitous access to the network in mind, thousands ofusers across the world are trying to use it for exactly this purpose.% Academic and NGO organizations, peacefire, \cite{berkman}, etcAnti-censorship networks hoping to bridge country-level blocks facea variety of challenges. One of these is that they need to find enoughexit nodes---servers on the `free' side that are willing to relayarbitrary traffic from users to their final destinations. Anonymizingnetworks including Tor are well-suited to this task, since we havealready gathered a set of exit nodes that are willing to tolerate somepolitical heat.The other main challenge is to distribute a list of reachable relaysto the users inside the country, and give them software to use them,without letting the authorities also enumerate this list and block eachrelay. Anonymizer solves this by buying lots of seemingly-unrelated IPaddresses (or having them donated), abandoning old addresses as they are`used up', and telling a few users about the new ones. Distributedanonymizing networks again have an advantage here, in that we alreadyhave tens of thousands of separate IP addresses whose users mightvolunteer to provide this service since they've already installed and usethe software for their own privacy~\cite{koepsell:wpes2004}. Becausethe Tor protocol separates routing from network discovery (see Section\ref{do-we-discuss-this?}), volunteers could configure their Tor clientsto generate server descriptors and send them to a special directoryserver that gives them out to dissidents who need to get around blocks.Of course, this still doesn't prevent the adversaryfrom enumerating all the volunteer relays and blocking them preemptively.Perhaps a tiered-trust system could be built where a few individuals aregiven relays' locations, and they recommend other individuals by telling themthose addresses, thus providing a built-in incentive to avoid letting theadversary intercept them. Max-flow trust algorithms~\cite{advogato}might help to bound the number of IP addresses leaked to the adversary. Groupslike the W3C are looking into using Tor as a component in an overall system tohelp address censorship; we wish them luck.%\cite{infranet}\subsection{Non-clique topologies}Tor's comparatively  weak model makes it easier to scale than other mix netdesigns.  High-latency mix networks need to avoid partitioning attacks, wherenetwork splits prevent users of the separate partitions from providing coverfor each other.  In Tor, however, we assume that the adversary cannotcheaply observe nodes at will, so even if the network becomes split, theusers do not necessarily receive much less protection.Thus, a simple possibility when the scale of a Tor networkexceeds some size is to simply split it. Care could be taken inallocating which nodes go to which network along the lines of\cite{casc-rep} to insure that collaborating hostile nodes are notable to gain any advantage in network splitting that they do notalready have in joining a network.% Describe these attacks; many people will not have read the paper!The attacks in \cite{attack-tor-oak05} show that certain types ofbrute force attacks are in fact feasible; however they make theabove point stronger not weaker. The attacks do not appear to besignificantly more difficult to mount against a network that istwice the size. Also, they only identify the Tor nodes used in acircuit, not the client. Finally note that even if the network is split,a client does not need to use just one of the two resulting networks.Alice could use either of them, and it would not be difficult to makethe Tor client able to access several such network on a per circuitbasis. More analysis is needed; we simply note here that splittinga Tor network is an easy way to achieve moderate scalability and thatit does not necessarily have the same implications as splitting a mixnet.Alternatively, we can try to scale a single Tor network.  Some issues forscaling include restricting the number of sockets and the amount of bandwidthused by each server.  The number of sockets is determined by the network'sconnectivity and the number of users, while bandwidth capacity is determinedby the total bandwidth of servers on the network.  The simplest solution tobandwidth capacity is to add more servers, since adding a tor node of anyfeasible bandwidth will increase the traffic capacity of the network.  So asa first step to scaling, we should focus on making the network tolerate moreservers, by reducing the interconnectivity of the nodes; later we can reduceoverhead associated withy directories, discovery, and so on.By reducing the connectivity of the network we increase the total number ofnodes that the network can contain. Danezis~\cite{danezis-pets03} considersthe anonymity implications of restricting routes on mix networks, andrecommends an approach based on expander graphs (where any subgraph is likelyto have many neighbors).  It is not immediately clear that this approach willextend to Tor, which has a weaker threat model but higher performancerequirements than the network considered.  Instead of analyzing theprobability of an attacker's viewing whole paths, we will need to examine theattacker's likelihood of compromising the endpoints of a Tor circuit througha sparse network.% Nick edits these next 2 grafs.To make matters simpler, Tor may not need an expander graph per se: itmay be enough to have a single subnet that is highly connected.  As anexample, assume fifty nodes of relatively high traffic capacity.  This\emph{center} forms are a clique.  Assume each center node can eachhandle 200 connections to other nodes (including the other ones in thecenter). Assume every noncenter node connects to three nodes in thecenter and anyone out of the center that they want to.  Then thenetwork easily scales to c. 2500 nodes with commensurate increase inbandwidth. There are many open questions: how directory informationis distributed (presumably information about the center nodes couldbe given to any new nodes with their codebase), whether center nodeswill need to function as a `backbone', etc. As above the point isthat this would create problems for the expected anonymity for a mixnet,but for an onion routing network where anonymity derives largely fromthe edges, it may be feasible.Another point is that we already have a non-clique topology.Individuals can set up and run Tor nodes without informing thedirectory servers. This will allow, e.g., dissident groups to run alocal Tor network of such nodes that connects to the public Tornetwork. This network is hidden behind the Tor network and itsonly visible connection to Tor at those points where it connects.As far as the public network is concerned or anyone observing it,they are running clients.\section{The Future}\label{sec:conclusion}we should put random thoughts here until there are enough for aconclusion.will our sustainability approach work? we'll see.Applications that leak data: we can say they're not our problem, butthey're somebody's problem.The more widely deployed Tor becomes, the more people who need adeployed overlay network tell us they'd like to use us if only we addedthe following more features."These are difficult and open questions, yet choosing not to solve themmeans leaving most users to a less secure network or no anonymizingnetwork at all."\bibliographystyle{plain} \bibliography{tor-design}\clearpage\appendix\begin{figure}[t]%\unitlength=1in\centering%\begin{picture}(6.0,2.0)%\put(3,1){\makebox(0,0)[c]{\epsfig{figure=graphnodes,width=6in}}}%\end{picture}\mbox{\epsfig{figure=graphnodes,width=5in}}\caption{Number of servers over time. Lowest line is number of exitnodes that allow connections to port 80. Middle line is total number ofverified (registered) servers. The line above that represents serversthat are not yet registered.}\label{fig:graphnodes}\end{figure}\begin{figure}[t]\centering\mbox{\epsfig{figure=graphtraffic,width=5in}}\caption{The sum of traffic reported by each server over time. The bottompair show average throughput, and the top pair represent the largest 15minute burst in each 4 hour period.}\label{fig:graphtraffic}\end{figure}\end{document}
 |