cpuworker.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /* Copyright 2003-2004 Roger Dingledine.
  2. * Copyright 2004-2005 Roger Dingledine, Nick Mathewson. */
  3. /* See LICENSE for licensing information */
  4. /* $Id$ */
  5. const char cpuworker_c_id[] = "$Id$";
  6. /**
  7. * \file cpuworker.c
  8. * \brief Implements a farm of 'CPU worker' processes to perform
  9. * CPU-intensive tasks in another thread or process, to not
  10. * interrupt the main thread.
  11. *
  12. * Right now, we only use this for processing onionskins.
  13. **/
  14. #include "or.h"
  15. /** The maximum number of cpuworker processes we will keep around. */
  16. #define MAX_CPUWORKERS 16
  17. /** The minimum number of cpuworker processes we will keep around. */
  18. #define MIN_CPUWORKERS 1
  19. /** The tag specifies which circuit this onionskin was from. */
  20. #define TAG_LEN 8
  21. /** How many bytes are sent from tor to the cpuworker? */
  22. #define LEN_ONION_QUESTION (1+TAG_LEN+ONIONSKIN_CHALLENGE_LEN)
  23. /** How many bytes are sent from the cpuworker back to tor? */
  24. #define LEN_ONION_RESPONSE (1+TAG_LEN+ONIONSKIN_REPLY_LEN+CPATH_KEY_MATERIAL_LEN)
  25. /** How many cpuworkers we have running right now. */
  26. static int num_cpuworkers=0;
  27. /** How many of the running cpuworkers have an assigned task right now. */
  28. static int num_cpuworkers_busy=0;
  29. /** We need to spawn new cpuworkers whenever we rotate the onion keys
  30. * on platforms where execution contexts==processes. This variable stores
  31. * the last time we got a key rotation event. */
  32. static time_t last_rotation_time=0;
  33. static int cpuworker_main(void *data);
  34. static int spawn_cpuworker(void);
  35. static void spawn_enough_cpuworkers(void);
  36. static void process_pending_task(connection_t *cpuworker);
  37. /** Initialize the cpuworker subsystem.
  38. */
  39. void
  40. cpu_init(void)
  41. {
  42. last_rotation_time=time(NULL);
  43. spawn_enough_cpuworkers();
  44. }
  45. /** Called when we're done sending a request to a cpuworker. */
  46. int
  47. connection_cpu_finished_flushing(connection_t *conn)
  48. {
  49. tor_assert(conn);
  50. tor_assert(conn->type == CONN_TYPE_CPUWORKER);
  51. connection_stop_writing(conn);
  52. return 0;
  53. }
  54. /** Pack addr,port,and circ_id; set *tag to the result. (See note on
  55. * cpuworker_main for wire format.) */
  56. static void
  57. tag_pack(char *tag, uint32_t addr, uint16_t port, uint16_t circ_id)
  58. {
  59. *(uint32_t *)tag = addr;
  60. *(uint16_t *)(tag+4) = port;
  61. *(uint16_t *)(tag+6) = circ_id;
  62. }
  63. /** Unpack <b>tag</b> into addr, port, and circ_id.
  64. */
  65. static void
  66. tag_unpack(const char *tag, uint32_t *addr, uint16_t *port, uint16_t *circ_id)
  67. {
  68. struct in_addr in;
  69. char addrbuf[INET_NTOA_BUF_LEN];
  70. *addr = *(const uint32_t *)tag;
  71. *port = *(const uint16_t *)(tag+4);
  72. *circ_id = *(const uint16_t *)(tag+6);
  73. in.s_addr = htonl(*addr);
  74. tor_inet_ntoa(&in, addrbuf, sizeof(addrbuf));
  75. log_fn(LOG_DEBUG,"onion was from %s:%d, circ_id %d.", addrbuf, *port, *circ_id);
  76. }
  77. /** Called when the onion key has changed and we need to spawn new
  78. * cpuworkers. Close all currently idle cpuworkers, and mark the last
  79. * rotation time as now.
  80. */
  81. void
  82. cpuworkers_rotate(void)
  83. {
  84. connection_t *cpuworker;
  85. while ((cpuworker = connection_get_by_type_state(CONN_TYPE_CPUWORKER,
  86. CPUWORKER_STATE_IDLE))) {
  87. connection_mark_for_close(cpuworker);
  88. --num_cpuworkers;
  89. }
  90. last_rotation_time = time(NULL);
  91. spawn_enough_cpuworkers();
  92. }
  93. /** If the cpuworker closes the connection,
  94. * mark it as closed and spawn a new one as needed. */
  95. int
  96. connection_cpu_reached_eof(connection_t *conn)
  97. {
  98. log_fn(LOG_WARN,"Read eof. Worker died unexpectedly.");
  99. if (conn->state != CPUWORKER_STATE_IDLE) {
  100. /* the circ associated with this cpuworker will have to wait until
  101. * it gets culled in run_connection_housekeeping(), since we have
  102. * no way to find out which circ it was. */
  103. log_fn(LOG_WARN,"...and it left a circuit queued; abandoning circ.");
  104. num_cpuworkers_busy--;
  105. }
  106. num_cpuworkers--;
  107. spawn_enough_cpuworkers(); /* try to regrow. hope we don't end up spinning. */
  108. connection_mark_for_close(conn);
  109. return 0;
  110. }
  111. /** Called when we get data from a cpuworker. If the answer is not complete,
  112. * wait for a complete answer. If the answer is complete,
  113. * process it as appropriate.
  114. */
  115. int
  116. connection_cpu_process_inbuf(connection_t *conn)
  117. {
  118. char success;
  119. char buf[LEN_ONION_RESPONSE];
  120. uint32_t addr;
  121. uint16_t port;
  122. uint16_t circ_id;
  123. connection_t *p_conn;
  124. circuit_t *circ;
  125. tor_assert(conn);
  126. tor_assert(conn->type == CONN_TYPE_CPUWORKER);
  127. if (!buf_datalen(conn->inbuf))
  128. return 0;
  129. if (conn->state == CPUWORKER_STATE_BUSY_ONION) {
  130. if (buf_datalen(conn->inbuf) < LEN_ONION_RESPONSE) /* entire answer available? */
  131. return 0; /* not yet */
  132. tor_assert(buf_datalen(conn->inbuf) == LEN_ONION_RESPONSE);
  133. connection_fetch_from_buf(&success,1,conn);
  134. connection_fetch_from_buf(buf,LEN_ONION_RESPONSE-1,conn);
  135. /* parse out the circ it was talking about */
  136. tag_unpack(buf, &addr, &port, &circ_id);
  137. circ = NULL;
  138. /* (Here we use connection_or_exact_get_by_addr_port rather than
  139. * get_by_identity_digest: we want a specific port here in
  140. * case there are multiple connections.) */
  141. p_conn = connection_or_exact_get_by_addr_port(addr,port);
  142. if (p_conn)
  143. circ = circuit_get_by_circid_orconn(circ_id, p_conn);
  144. if (success == 0) {
  145. log_fn(LOG_INFO,"decoding onionskin failed. Closing.");
  146. if (circ)
  147. circuit_mark_for_close(circ);
  148. goto done_processing;
  149. }
  150. if (!circ) {
  151. log_fn(LOG_INFO,"processed onion for a circ that's gone. Dropping.");
  152. goto done_processing;
  153. }
  154. tor_assert(circ->p_conn);
  155. if (onionskin_answer(circ, CELL_CREATED, buf+TAG_LEN, buf+TAG_LEN+ONIONSKIN_REPLY_LEN) < 0) {
  156. log_fn(LOG_WARN,"onionskin_answer failed. Closing.");
  157. circuit_mark_for_close(circ);
  158. goto done_processing;
  159. }
  160. log_fn(LOG_DEBUG,"onionskin_answer succeeded. Yay.");
  161. } else {
  162. tor_assert(0); /* don't ask me to do handshakes yet */
  163. }
  164. done_processing:
  165. conn->state = CPUWORKER_STATE_IDLE;
  166. num_cpuworkers_busy--;
  167. if (conn->timestamp_created < last_rotation_time) {
  168. connection_mark_for_close(conn);
  169. num_cpuworkers--;
  170. spawn_enough_cpuworkers();
  171. } else {
  172. process_pending_task(conn);
  173. }
  174. return 0;
  175. }
  176. /** Implement a cpuworker. 'data' is an fdarray as returned by socketpair.
  177. * Read and writes from fdarray[1]. Reads requests, writes answers.
  178. *
  179. * Request format:
  180. * Task type [1 byte, always CPUWORKER_TASK_ONION]
  181. * Opaque tag TAG_LEN
  182. * Onionskin challenge ONIONSKIN_CHALLENGE_LEN
  183. * Response format:
  184. * Success/failure [1 byte, boolean.]
  185. * Opaque tag TAG_LEN
  186. * Onionskin challenge ONIONSKIN_REPLY_LEN
  187. * Negotiated keys KEY_LEN*2+DIGEST_LEN*2
  188. *
  189. * (Note: this _should_ be by addr/port, since we're concerned with specific
  190. * connections, not with routers (where we'd use identity).)
  191. */
  192. static int
  193. cpuworker_main(void *data)
  194. {
  195. char question[ONIONSKIN_CHALLENGE_LEN];
  196. uint8_t question_type;
  197. int *fdarray = data;
  198. int fd;
  199. /* variables for onion processing */
  200. char keys[CPATH_KEY_MATERIAL_LEN];
  201. char reply_to_proxy[ONIONSKIN_REPLY_LEN];
  202. char buf[LEN_ONION_RESPONSE];
  203. char tag[TAG_LEN];
  204. crypto_pk_env_t *onion_key = NULL, *last_onion_key = NULL;
  205. fd = fdarray[1]; /* this side is ours */
  206. #ifndef TOR_IS_MULTITHREADED
  207. tor_close_socket(fdarray[0]); /* this is the side of the socketpair the parent uses */
  208. tor_free_all(1); /* so the child doesn't hold the parent's fd's open */
  209. handle_signals(0); /* ignore interrupts from the keyboard, etc */
  210. #endif
  211. tor_free(data);
  212. dup_onion_keys(&onion_key, &last_onion_key);
  213. for (;;) {
  214. int r;
  215. if ((r = recv(fd, &question_type, 1, 0)) != 1) {
  216. // log_fn(LOG_ERR,"read type failed. Exiting.");
  217. if (r == 0) {
  218. log_fn(LOG_INFO,"CPU worker exiting because Tor process closed connection (either rotated keys or died).");
  219. } else {
  220. log_fn(LOG_INFO,"CPU worker editing because of error on connection to Tor process.");
  221. log_fn(LOG_INFO,"(Error on %d was %s)", fd, tor_socket_strerror(tor_socket_errno(fd)));
  222. }
  223. goto end;
  224. }
  225. tor_assert(question_type == CPUWORKER_TASK_ONION);
  226. if (read_all(fd, tag, TAG_LEN, 1) != TAG_LEN) {
  227. log_fn(LOG_ERR,"read tag failed. Exiting.");
  228. goto end;
  229. }
  230. if (read_all(fd, question, ONIONSKIN_CHALLENGE_LEN, 1) != ONIONSKIN_CHALLENGE_LEN) {
  231. log_fn(LOG_ERR,"read question failed. Exiting.");
  232. goto end;
  233. }
  234. if (question_type == CPUWORKER_TASK_ONION) {
  235. if (onion_skin_server_handshake(question, onion_key, last_onion_key,
  236. reply_to_proxy, keys, CPATH_KEY_MATERIAL_LEN) < 0) {
  237. /* failure */
  238. log_fn(LOG_INFO,"onion_skin_server_handshake failed.");
  239. memset(buf,0,LEN_ONION_RESPONSE); /* send all zeros for failure */
  240. } else {
  241. /* success */
  242. log_fn(LOG_DEBUG,"onion_skin_server_handshake succeeded.");
  243. buf[0] = 1; /* 1 means success */
  244. memcpy(buf+1,tag,TAG_LEN);
  245. memcpy(buf+1+TAG_LEN,reply_to_proxy,ONIONSKIN_REPLY_LEN);
  246. memcpy(buf+1+TAG_LEN+ONIONSKIN_REPLY_LEN,keys,CPATH_KEY_MATERIAL_LEN);
  247. }
  248. if (write_all(fd, buf, LEN_ONION_RESPONSE, 1) != LEN_ONION_RESPONSE) {
  249. log_fn(LOG_ERR,"writing response buf failed. Exiting.");
  250. goto end;
  251. }
  252. log_fn(LOG_DEBUG,"finished writing response.");
  253. }
  254. }
  255. end:
  256. if (onion_key)
  257. crypto_free_pk_env(onion_key);
  258. if (last_onion_key)
  259. crypto_free_pk_env(last_onion_key);
  260. tor_close_socket(fd);
  261. spawn_exit();
  262. return 0; /* windows wants this function to return an int */
  263. }
  264. /** Launch a new cpuworker.
  265. */
  266. static int
  267. spawn_cpuworker(void)
  268. {
  269. int *fdarray;
  270. int fd;
  271. connection_t *conn;
  272. fdarray = tor_malloc(sizeof(int)*2);
  273. if (tor_socketpair(AF_UNIX, SOCK_STREAM, 0, fdarray) < 0) {
  274. log(LOG_ERR, "Couldn't construct socketpair: %s",
  275. tor_socket_strerror(tor_socket_errno(-1)));
  276. tor_cleanup();
  277. tor_free(fdarray);
  278. exit(1);
  279. }
  280. fd = fdarray[0];
  281. spawn_func(cpuworker_main, (void*)fdarray);
  282. log_fn(LOG_DEBUG,"just spawned a worker.");
  283. #ifndef TOR_IS_MULTITHREADED
  284. tor_close_socket(fdarray[1]); /* we don't need the worker's side of the pipe */
  285. tor_free(fdarray);
  286. #endif
  287. conn = connection_new(CONN_TYPE_CPUWORKER);
  288. set_socket_nonblocking(fd);
  289. /* set up conn so it's got all the data we need to remember */
  290. conn->s = fd;
  291. conn->address = tor_strdup("localhost");
  292. if (connection_add(conn) < 0) { /* no space, forget it */
  293. log_fn(LOG_WARN,"connection_add failed. Giving up.");
  294. connection_free(conn); /* this closes fd */
  295. return -1;
  296. }
  297. conn->state = CPUWORKER_STATE_IDLE;
  298. connection_start_reading(conn);
  299. return 0; /* success */
  300. }
  301. /** If we have too few or too many active cpuworkers, try to spawn new ones
  302. * or kill idle ones.
  303. */
  304. static void
  305. spawn_enough_cpuworkers(void)
  306. {
  307. int num_cpuworkers_needed = get_options()->NumCpus;
  308. if (num_cpuworkers_needed < MIN_CPUWORKERS)
  309. num_cpuworkers_needed = MIN_CPUWORKERS;
  310. if (num_cpuworkers_needed > MAX_CPUWORKERS)
  311. num_cpuworkers_needed = MAX_CPUWORKERS;
  312. while (num_cpuworkers < num_cpuworkers_needed) {
  313. if (spawn_cpuworker() < 0) {
  314. log_fn(LOG_WARN,"spawn failed!");
  315. return;
  316. }
  317. num_cpuworkers++;
  318. }
  319. }
  320. /** Take a pending task from the queue and assign it to 'cpuworker'. */
  321. static void
  322. process_pending_task(connection_t *cpuworker)
  323. {
  324. circuit_t *circ;
  325. tor_assert(cpuworker);
  326. /* for now only process onion tasks */
  327. circ = onion_next_task();
  328. if (!circ)
  329. return;
  330. if (assign_to_cpuworker(cpuworker, CPUWORKER_TASK_ONION, circ) < 0)
  331. log_fn(LOG_WARN,"assign_to_cpuworker failed. Ignoring.");
  332. }
  333. #define CPUWORKER_BUSY_TIMEOUT 3600 /* seconds */
  334. /** We have a bug that I can't find. Sometimes, very rarely, cpuworkers
  335. * get stuck in the 'busy' state, even though the cpuworker process
  336. * thinks of itself as idle. I don't know why. But here's a workaround
  337. * to kill any cpuworker that's been busy for more than 3600 seconds. */
  338. static void
  339. cull_wedged_cpuworkers(void)
  340. {
  341. connection_t **carray;
  342. connection_t *conn;
  343. int n_conns, i;
  344. time_t now = time(NULL);
  345. get_connection_array(&carray, &n_conns);
  346. for (i = 0; i < n_conns; ++i) {
  347. conn = carray[i];
  348. if (!conn->marked_for_close &&
  349. conn->type == CONN_TYPE_CPUWORKER &&
  350. conn->state == CPUWORKER_STATE_BUSY_ONION &&
  351. conn->timestamp_lastwritten + CPUWORKER_BUSY_TIMEOUT < now) {
  352. log_fn(LOG_NOTICE,"Bug: closing wedged cpuworker. Can somebody find the bug?");
  353. num_cpuworkers_busy--;
  354. num_cpuworkers--;
  355. connection_mark_for_close(conn);
  356. }
  357. }
  358. }
  359. /** If cpuworker is defined, assert that he's idle, and use him. Else,
  360. * look for an idle cpuworker and use him. If none idle, queue task onto
  361. * the pending onion list and return.
  362. * If question_type is CPUWORKER_TASK_ONION then task is a circ.
  363. * No other question_types are allowed.
  364. */
  365. int
  366. assign_to_cpuworker(connection_t *cpuworker, uint8_t question_type,
  367. void *task)
  368. {
  369. circuit_t *circ;
  370. char tag[TAG_LEN];
  371. tor_assert(question_type == CPUWORKER_TASK_ONION);
  372. cull_wedged_cpuworkers();
  373. spawn_enough_cpuworkers();
  374. if (question_type == CPUWORKER_TASK_ONION) {
  375. circ = task;
  376. if (num_cpuworkers_busy == num_cpuworkers) {
  377. log_fn(LOG_DEBUG,"No idle cpuworkers. Queuing.");
  378. if (onion_pending_add(circ) < 0)
  379. return -1;
  380. return 0;
  381. }
  382. if (!cpuworker)
  383. cpuworker = connection_get_by_type_state(CONN_TYPE_CPUWORKER, CPUWORKER_STATE_IDLE);
  384. tor_assert(cpuworker);
  385. if (!circ->p_conn) {
  386. log_fn(LOG_INFO,"circ->p_conn gone. Failing circ.");
  387. return -1;
  388. }
  389. tag_pack(tag, circ->p_conn->addr, circ->p_conn->port, circ->p_circ_id);
  390. cpuworker->state = CPUWORKER_STATE_BUSY_ONION;
  391. num_cpuworkers_busy++;
  392. connection_write_to_buf((char*)&question_type, 1, cpuworker);
  393. connection_write_to_buf(tag, sizeof(tag), cpuworker);
  394. connection_write_to_buf(circ->onionskin, ONIONSKIN_CHALLENGE_LEN, cpuworker);
  395. }
  396. return 0;
  397. }