shim_ipc_helper.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_ipc_helper.c
  15. *
  16. * This file contains code to create an IPC helper thread inside library OS and maintain bookkeeping
  17. * of IPC ports.
  18. */
  19. #include <list.h>
  20. #include <pal.h>
  21. #include <pal_error.h>
  22. #include <shim_checkpoint.h>
  23. #include <shim_handle.h>
  24. #include <shim_internal.h>
  25. #include <shim_ipc.h>
  26. #include <shim_profile.h>
  27. #include <shim_thread.h>
  28. #include <shim_utils.h>
  29. #define IPC_HELPER_STACK_SIZE (g_pal_alloc_align * 4)
  30. static struct shim_lock ipc_port_mgr_lock;
  31. #define SYSTEM_LOCK() lock(&ipc_port_mgr_lock)
  32. #define SYSTEM_UNLOCK() unlock(&ipc_port_mgr_lock)
  33. #define SYSTEM_LOCKED() locked(&ipc_port_mgr_lock)
  34. #define PORT_MGR_ALLOC 32
  35. #define OBJ_TYPE struct shim_ipc_port
  36. #include "memmgr.h"
  37. static MEM_MGR port_mgr;
  38. DEFINE_LISTP(shim_ipc_port);
  39. static LISTP_TYPE(shim_ipc_port) port_list;
  40. static enum { HELPER_NOTALIVE, HELPER_ALIVE } ipc_helper_state;
  41. static struct shim_thread* ipc_helper_thread;
  42. static struct shim_lock ipc_helper_lock;
  43. static AEVENTTYPE install_new_event;
  44. static int create_ipc_helper(void);
  45. static int ipc_resp_callback(struct shim_ipc_msg* msg, struct shim_ipc_port* port);
  46. static ipc_callback ipc_callbacks[IPC_CODE_NUM] = {
  47. /* RESP */ &ipc_resp_callback,
  48. /* CHECKPOINT */ &ipc_checkpoint_callback,
  49. /* parents and children */
  50. /* CLD_EXIT */ &ipc_cld_exit_callback,
  51. #ifdef PROFILE
  52. /* CLD_PROFILE */ &ipc_cld_profile_callback,
  53. #endif
  54. /* pid namespace */
  55. IPC_NS_CALLBACKS(pid)
  56. /* PID_KILL */ &ipc_pid_kill_callback,
  57. /* PID_GETSTATUS */ &ipc_pid_getstatus_callback,
  58. /* PID_RETSTATUS */ &ipc_pid_retstatus_callback,
  59. /* PID_GETMETA */ &ipc_pid_getmeta_callback,
  60. /* PID_RETMETA */ &ipc_pid_retmeta_callback,
  61. /* PID_NOP */ &ipc_pid_nop_callback,
  62. /* PID_SENDRPC */ &ipc_pid_sendrpc_callback,
  63. /* sysv namespace */
  64. IPC_NS_CALLBACKS(sysv)
  65. IPC_NS_KEY_CALLBACKS(sysv)
  66. /* SYSV_DELRES */ &ipc_sysv_delres_callback,
  67. /* SYSV_MOVRES */ &ipc_sysv_movres_callback,
  68. /* SYSV_MSGSND */ &ipc_sysv_msgsnd_callback,
  69. /* SYSV_MSGRCV */ &ipc_sysv_msgrcv_callback,
  70. /* SYSV_MSGMOV */ &ipc_sysv_msgmov_callback,
  71. /* SYSV_SEMOP */ &ipc_sysv_semop_callback,
  72. /* SYSV_SEMCTL */ &ipc_sysv_semctl_callback,
  73. /* SYSV_SEMRET */ &ipc_sysv_semret_callback,
  74. /* SYSV_SEMMOV */ &ipc_sysv_semmov_callback,
  75. };
  76. static int init_self_ipc_port(void) {
  77. lock(&cur_process.lock);
  78. if (!cur_process.self) {
  79. /* very first process or clone/fork case: create IPC port from scratch */
  80. cur_process.self = create_ipc_info_cur_process(/*is_self_ipc_info=*/true);
  81. if (!cur_process.self) {
  82. unlock(&cur_process.lock);
  83. return -EACCES;
  84. }
  85. } else {
  86. /* execve case: inherited IPC port from parent process */
  87. assert(cur_process.self->pal_handle && !qstrempty(&cur_process.self->uri));
  88. add_ipc_port_by_id(cur_process.self->vmid, cur_process.self->pal_handle, IPC_PORT_SERVER,
  89. /*fini=*/NULL, &cur_process.self->port);
  90. }
  91. unlock(&cur_process.lock);
  92. return 0;
  93. }
  94. static int init_parent_ipc_port(void) {
  95. if (!PAL_CB(parent_process) || !cur_process.parent) {
  96. /* no parent process, no sense in creating parent IPC port */
  97. return 0;
  98. }
  99. lock(&cur_process.lock);
  100. assert(cur_process.parent && cur_process.parent->vmid);
  101. /* for execve case, my parent is the parent of my parent (current process transparently inherits
  102. * the "real" parent through already opened pal_handle on "temporary" parent's
  103. * cur_process.parent) */
  104. if (!cur_process.parent->pal_handle) {
  105. /* for clone/fork case, parent is connected on parent_process */
  106. cur_process.parent->pal_handle = PAL_CB(parent_process);
  107. }
  108. add_ipc_port_by_id(cur_process.parent->vmid, cur_process.parent->pal_handle,
  109. IPC_PORT_DIRPRT | IPC_PORT_LISTEN,
  110. /*fini=*/NULL, &cur_process.parent->port);
  111. unlock(&cur_process.lock);
  112. return 0;
  113. }
  114. static int init_ns_ipc_port(int ns_idx) {
  115. if (!cur_process.ns[ns_idx]) {
  116. /* no NS info from parent process, no sense in creating NS IPC port */
  117. return 0;
  118. }
  119. if (!cur_process.ns[ns_idx]->pal_handle && qstrempty(&cur_process.ns[ns_idx]->uri)) {
  120. /* there is no connection to NS leader via PAL handle and there is no URI to find NS leader:
  121. * do not create NS IPC port now, it will be created on-demand during NS leader lookup */
  122. return 0;
  123. }
  124. lock(&cur_process.lock);
  125. if (!cur_process.ns[ns_idx]->pal_handle) {
  126. debug("Reconnecting IPC port %s\n", qstrgetstr(&cur_process.ns[ns_idx]->uri));
  127. cur_process.ns[ns_idx]->pal_handle =
  128. DkStreamOpen(qstrgetstr(&cur_process.ns[ns_idx]->uri), 0, 0, 0, 0);
  129. if (!cur_process.ns[ns_idx]->pal_handle) {
  130. unlock(&cur_process.lock);
  131. return -PAL_ERRNO;
  132. }
  133. }
  134. IDTYPE type = (ns_idx == PID_NS) ? IPC_PORT_PIDLDR : IPC_PORT_SYSVLDR;
  135. add_ipc_port_by_id(cur_process.ns[ns_idx]->vmid, cur_process.ns[ns_idx]->pal_handle,
  136. type | IPC_PORT_LISTEN,
  137. /*fini=*/NULL, &cur_process.ns[ns_idx]->port);
  138. unlock(&cur_process.lock);
  139. return 0;
  140. }
  141. int init_ipc_ports(void) {
  142. if (!create_lock(&ipc_port_mgr_lock)) {
  143. return -ENOMEM;
  144. }
  145. if (!(port_mgr = create_mem_mgr(init_align_up(PORT_MGR_ALLOC))))
  146. return -ENOMEM;
  147. int ret;
  148. if ((ret = init_self_ipc_port()) < 0)
  149. return ret;
  150. if ((ret = init_parent_ipc_port()) < 0)
  151. return ret;
  152. if ((ret = init_ns_ipc_port(PID_NS)) < 0)
  153. return ret;
  154. if ((ret = init_ns_ipc_port(SYSV_NS)) < 0)
  155. return ret;
  156. return 0;
  157. }
  158. int init_ipc_helper(void) {
  159. /* early enough in init, can write global vars without the lock */
  160. ipc_helper_state = HELPER_NOTALIVE;
  161. if (!create_lock(&ipc_helper_lock)) {
  162. return -ENOMEM;
  163. }
  164. create_event(&install_new_event);
  165. /* some IPC ports were already added before this point, so spawn IPC helper thread (and enable
  166. * locking mechanisms if not done already since we are going in multi-threaded mode) */
  167. enable_locking();
  168. lock(&ipc_helper_lock);
  169. int ret = create_ipc_helper();
  170. unlock(&ipc_helper_lock);
  171. return ret;
  172. }
  173. static struct shim_ipc_port* __create_ipc_port(PAL_HANDLE hdl) {
  174. struct shim_ipc_port* port =
  175. get_mem_obj_from_mgr_enlarge(port_mgr, size_align_up(PORT_MGR_ALLOC));
  176. if (!port)
  177. return NULL;
  178. memset(port, 0, sizeof(struct shim_ipc_port));
  179. port->pal_handle = hdl;
  180. INIT_LIST_HEAD(port, list);
  181. INIT_LISTP(&port->msgs);
  182. REF_SET(port->ref_count, 0);
  183. if (!create_lock(&port->msgs_lock)) {
  184. free_mem_obj_to_mgr(port_mgr, port);
  185. return NULL;
  186. }
  187. return port;
  188. }
  189. static void __free_ipc_port(struct shim_ipc_port* port) {
  190. assert(locked(&ipc_helper_lock));
  191. if (port->pal_handle) {
  192. DkObjectClose(port->pal_handle);
  193. port->pal_handle = NULL;
  194. }
  195. destroy_lock(&port->msgs_lock);
  196. free_mem_obj_to_mgr(port_mgr, port);
  197. }
  198. static void __get_ipc_port(struct shim_ipc_port* port) {
  199. REF_INC(port->ref_count);
  200. }
  201. static void __put_ipc_port(struct shim_ipc_port* port) {
  202. assert(locked(&ipc_helper_lock));
  203. int ref_count = REF_DEC(port->ref_count);
  204. if (!ref_count)
  205. __free_ipc_port(port);
  206. }
  207. void get_ipc_port(struct shim_ipc_port* port) {
  208. /* no need to grab ipc_helper_lock because __get_ipc_port() does not touch global state */
  209. __get_ipc_port(port);
  210. }
  211. void put_ipc_port(struct shim_ipc_port* port) {
  212. /* this is atomic so we don't grab lock in common case of ref_count > 0 */
  213. int ref_count = REF_DEC(port->ref_count);
  214. if (!ref_count) {
  215. lock(&ipc_helper_lock);
  216. __free_ipc_port(port);
  217. unlock(&ipc_helper_lock);
  218. }
  219. }
  220. static void __add_ipc_port(struct shim_ipc_port* port, IDTYPE vmid, IDTYPE type, port_fini fini) {
  221. assert(locked(&ipc_helper_lock));
  222. port->type |= type;
  223. if (vmid && !port->vmid)
  224. port->vmid = vmid;
  225. /* find empty slot in fini callbacks and register callback */
  226. if (fini) {
  227. bool found_empty_slot = false;
  228. __UNUSED(found_empty_slot);
  229. for (int i = 0; i < MAX_IPC_PORT_FINI_CB; i++)
  230. if (!port->fini[i] || port->fini[i] == fini) {
  231. port->fini[i] = fini;
  232. found_empty_slot = true;
  233. break;
  234. }
  235. assert(found_empty_slot);
  236. }
  237. /* add to port list if not there already */
  238. if (LIST_EMPTY(port, list)) {
  239. __get_ipc_port(port);
  240. LISTP_ADD(port, &port_list, list);
  241. }
  242. /* wake up IPC helper thread so that it picks up added port */
  243. if (ipc_helper_state == HELPER_ALIVE)
  244. set_event(&install_new_event, 1);
  245. }
  246. static void __del_ipc_port(struct shim_ipc_port* port) {
  247. assert(locked(&ipc_helper_lock));
  248. debug("Deleting port %p (handle %p) of process %u\n", port, port->pal_handle,
  249. port->vmid & 0xFFFF);
  250. DkStreamDelete(port->pal_handle, 0);
  251. LISTP_DEL_INIT(port, &port_list, list);
  252. /* Check for pending messages on port (threads might be blocking for responses) */
  253. lock(&port->msgs_lock);
  254. struct shim_ipc_msg_duplex* msg;
  255. struct shim_ipc_msg_duplex* tmp;
  256. LISTP_FOR_EACH_ENTRY_SAFE(msg, tmp, &port->msgs, list) {
  257. LISTP_DEL_INIT(msg, &port->msgs, list);
  258. msg->retval = -ECONNRESET;
  259. if (msg->thread) {
  260. debug("Deleted pending message on port %p, wake up blocking thread %d\n", port,
  261. msg->thread->tid);
  262. thread_wakeup(msg->thread);
  263. }
  264. }
  265. unlock(&port->msgs_lock);
  266. __put_ipc_port(port);
  267. /* wake up IPC helper thread so that it forgets about deleted port */
  268. if (ipc_helper_state == HELPER_ALIVE)
  269. set_event(&install_new_event, 1);
  270. }
  271. void add_ipc_port(struct shim_ipc_port* port, IDTYPE vmid, IDTYPE type, port_fini fini) {
  272. debug("Adding port %p (handle %p) for process %u (type=%04x)\n", port, port->pal_handle,
  273. port->vmid & 0xFFFF, type);
  274. lock(&ipc_helper_lock);
  275. __add_ipc_port(port, vmid, type, fini);
  276. unlock(&ipc_helper_lock);
  277. }
  278. void add_ipc_port_by_id(IDTYPE vmid, PAL_HANDLE hdl, IDTYPE type, port_fini fini,
  279. struct shim_ipc_port** portptr) {
  280. debug("Adding port (handle %p) for process %u (type %04x)\n", hdl, vmid & 0xFFFF, type);
  281. struct shim_ipc_port* port = NULL;
  282. if (portptr)
  283. *portptr = NULL;
  284. assert(hdl);
  285. lock(&ipc_helper_lock);
  286. /* check if port with this PAL handle already exists, then we only need to update its vmid,
  287. * type, and fini callback */
  288. struct shim_ipc_port* tmp;
  289. LISTP_FOR_EACH_ENTRY(tmp, &port_list, list) {
  290. if (tmp->pal_handle == hdl) {
  291. port = tmp;
  292. break;
  293. }
  294. }
  295. if (!port) {
  296. /* port does not yet exist, create it */
  297. port = __create_ipc_port(hdl);
  298. if (!port) {
  299. debug("Failed to create IPC port for handle %p\n", hdl);
  300. goto out;
  301. }
  302. }
  303. /* add/update port */
  304. __add_ipc_port(port, vmid, type, fini);
  305. if (portptr) {
  306. __get_ipc_port(port);
  307. *portptr = port;
  308. }
  309. out:
  310. unlock(&ipc_helper_lock);
  311. }
  312. void del_ipc_port_fini(struct shim_ipc_port* port, unsigned int exitcode) {
  313. lock(&ipc_helper_lock);
  314. if (LIST_EMPTY(port, list)) {
  315. unlock(&ipc_helper_lock);
  316. return;
  317. }
  318. /* prevent __del_ipc_port() from freeing port since we need it for fini callbacks */
  319. __get_ipc_port(port);
  320. __del_ipc_port(port);
  321. unlock(&ipc_helper_lock);
  322. for (int i = 0; i < MAX_IPC_PORT_FINI_CB; i++)
  323. if (port->fini[i]) {
  324. (port->fini[i])(port, port->vmid, exitcode);
  325. port->fini[i] = NULL;
  326. }
  327. put_ipc_port(port);
  328. }
  329. void del_all_ipc_ports(void) {
  330. lock(&ipc_helper_lock);
  331. struct shim_ipc_port* port;
  332. struct shim_ipc_port* tmp;
  333. LISTP_FOR_EACH_ENTRY_SAFE(port, tmp, &port_list, list) {
  334. __del_ipc_port(port);
  335. }
  336. unlock(&ipc_helper_lock);
  337. }
  338. struct shim_ipc_port* lookup_ipc_port(IDTYPE vmid, IDTYPE type) {
  339. struct shim_ipc_port* port = NULL;
  340. assert(vmid && type);
  341. lock(&ipc_helper_lock);
  342. struct shim_ipc_port* tmp;
  343. LISTP_FOR_EACH_ENTRY(tmp, &port_list, list) {
  344. if (tmp->vmid == vmid && (tmp->type & type)) {
  345. debug("Found port %p (handle %p) for process %u (type %04x)\n", tmp, tmp->pal_handle,
  346. tmp->vmid & 0xFFFF, tmp->type);
  347. port = tmp;
  348. __get_ipc_port(port);
  349. break;
  350. }
  351. }
  352. unlock(&ipc_helper_lock);
  353. return port;
  354. }
  355. #define PORTS_ON_STACK_CNT 32
  356. int broadcast_ipc(struct shim_ipc_msg* msg, int target_type, struct shim_ipc_port* exclude_port) {
  357. int ret;
  358. struct shim_ipc_port* port;
  359. struct shim_ipc_port** target_ports;
  360. size_t target_ports_cnt = 0;
  361. assert(target_type);
  362. lock(&ipc_helper_lock);
  363. /* Collect all ports with appropriate types. In common case, stack-allocated array of
  364. * PORTS_ON_STACK_CNT ports is enough. If there are more ports, we will allocate a bigger array
  365. * on the heap and collect all ports again. */
  366. struct shim_ipc_port* target_ports_stack[PORTS_ON_STACK_CNT];
  367. LISTP_FOR_EACH_ENTRY(port, &port_list, list) {
  368. if (port == exclude_port)
  369. continue;
  370. if (port->type & target_type) {
  371. if (target_ports_cnt < PORTS_ON_STACK_CNT)
  372. target_ports_stack[target_ports_cnt] = port;
  373. target_ports_cnt++;
  374. }
  375. }
  376. target_ports = target_ports_stack;
  377. if (target_ports_cnt > PORTS_ON_STACK_CNT) {
  378. /* Rare case when there are more than PORTS_ON_STACK_CNT ports. Allocate big-enough array on
  379. * the heap and collect all ports again. */
  380. size_t cnt = 0;
  381. struct shim_ipc_port** target_ports_heap =
  382. malloc(sizeof(struct shim_ipc_port*) * target_ports_cnt);
  383. if (!target_ports_heap) {
  384. unlock(&ipc_helper_lock);
  385. debug("Allocation of target_ports_heap failed\n");
  386. return -ENOMEM;
  387. }
  388. LISTP_FOR_EACH_ENTRY(port, &port_list, list) {
  389. if (port == exclude_port)
  390. continue;
  391. if (port->type & target_type)
  392. target_ports_heap[cnt++] = port;
  393. }
  394. target_ports = target_ports_heap;
  395. assert(cnt == target_ports_cnt);
  396. }
  397. for (size_t i = 0; i < target_ports_cnt; i++)
  398. __get_ipc_port(target_ports[i]);
  399. unlock(&ipc_helper_lock);
  400. /* send msg to each collected port (note that ports cannot be freed in meantime) */
  401. for (size_t i = 0; i < target_ports_cnt; i++) {
  402. port = target_ports[i];
  403. debug("Broadcast to port %p (handle %p) for process %u (type %x, target %x)\n",
  404. port, port->pal_handle, port->vmid & 0xFFFF, port->type, target_type);
  405. msg->dst = port->vmid;
  406. ret = send_ipc_message(msg, port);
  407. if (ret < 0) {
  408. debug("Broadcast to port %p (handle %p) for process %u failed (errno = %d)!\n",
  409. port, port->pal_handle, port->vmid & 0xFFFF, ret);
  410. goto out;
  411. }
  412. }
  413. ret = 0;
  414. out:
  415. for (size_t i = 0; i < target_ports_cnt; i++)
  416. put_ipc_port(target_ports[i]);
  417. if (target_ports != target_ports_stack)
  418. free(target_ports);
  419. return ret;
  420. }
  421. static int ipc_resp_callback(struct shim_ipc_msg* msg, struct shim_ipc_port* port) {
  422. struct shim_ipc_resp* resp = (struct shim_ipc_resp*)&msg->msg;
  423. debug("IPC callback from %u: IPC_RESP(%d)\n", msg->src & 0xFFFF, resp->retval);
  424. if (!msg->seq)
  425. return resp->retval;
  426. /* find a corresponding request msg for this response msg */
  427. struct shim_ipc_msg_duplex* req_msg = pop_ipc_msg_duplex(port, msg->seq);
  428. /* if some thread is waiting for response, wake it with response retval */
  429. if (req_msg) {
  430. req_msg->retval = resp->retval;
  431. if (req_msg->thread)
  432. thread_wakeup(req_msg->thread);
  433. return 0;
  434. }
  435. return resp->retval;
  436. }
  437. int send_response_ipc_message(struct shim_ipc_port* port, IDTYPE dest, int ret, unsigned long seq) {
  438. ret = (ret == RESPONSE_CALLBACK) ? 0 : ret;
  439. /* create IPC_RESP msg to send to dest, with sequence number seq, and in-body retval ret */
  440. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_resp));
  441. struct shim_ipc_msg* resp_msg = __alloca(total_msg_size);
  442. init_ipc_msg(resp_msg, IPC_RESP, total_msg_size, dest);
  443. resp_msg->seq = seq;
  444. struct shim_ipc_resp* resp = (struct shim_ipc_resp*)resp_msg->msg;
  445. resp->retval = ret;
  446. debug("IPC send to %u: IPC_RESP(%d)\n", resp_msg->dst & 0xFFFF, ret);
  447. return send_ipc_message(resp_msg, port);
  448. }
  449. static int receive_ipc_message(struct shim_ipc_port* port) {
  450. int ret;
  451. size_t readahead = IPC_MSG_MINIMAL_SIZE * 2;
  452. size_t bufsize = IPC_MSG_MINIMAL_SIZE + readahead;
  453. struct shim_ipc_msg* msg = malloc(bufsize);
  454. if (!msg) {
  455. return -ENOMEM;
  456. }
  457. size_t expected_size = IPC_MSG_MINIMAL_SIZE;
  458. size_t bytes = 0;
  459. do {
  460. while (bytes < expected_size) {
  461. /* grow msg buffer to accomodate bigger messages */
  462. if (expected_size + readahead > bufsize) {
  463. while (expected_size + readahead > bufsize)
  464. bufsize *= 2;
  465. void* tmp_buf = malloc(bufsize);
  466. if (!tmp_buf) {
  467. ret = -ENOMEM;
  468. goto out;
  469. }
  470. memcpy(tmp_buf, msg, bytes);
  471. free(msg);
  472. msg = tmp_buf;
  473. }
  474. PAL_NUM read =
  475. DkStreamRead(port->pal_handle, /*offset=*/0, expected_size - bytes + readahead,
  476. (void*)msg + bytes, NULL, 0);
  477. if (read == PAL_STREAM_ERROR) {
  478. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN || PAL_ERRNO == EWOULDBLOCK)
  479. continue;
  480. debug("Port %p (handle %p) closed while receiving IPC message\n", port,
  481. port->pal_handle);
  482. del_ipc_port_fini(port, -ECHILD);
  483. ret = -PAL_ERRNO;
  484. goto out;
  485. }
  486. bytes += read;
  487. /* extract actual msg size from msg header and continue reading msg body */
  488. if (bytes >= IPC_MSG_MINIMAL_SIZE)
  489. expected_size = msg->size;
  490. }
  491. debug(
  492. "Received IPC message from port %p (handle %p): code=%d size=%lu "
  493. "src=%u dst=%u seq=%lx\n",
  494. port, port->pal_handle, msg->code, msg->size, msg->src & 0xFFFF, msg->dst & 0xFFFF,
  495. msg->seq);
  496. /* skip messages coming from myself (in case of broadcast) */
  497. if (msg->src != cur_process.vmid) {
  498. if (msg->code < IPC_CODE_NUM && ipc_callbacks[msg->code]) {
  499. /* invoke callback to this msg */
  500. ret = (*ipc_callbacks[msg->code])(msg, port);
  501. if ((ret < 0 || ret == RESPONSE_CALLBACK) && msg->seq) {
  502. /* send IPC_RESP message to sender of this msg */
  503. ret = send_response_ipc_message(port, msg->src, ret, msg->seq);
  504. if (ret < 0) {
  505. debug("Sending IPC_RESP msg on port %p (handle %p) to %u failed\n", port,
  506. port->pal_handle, msg->src & 0xFFFF);
  507. ret = -PAL_ERRNO;
  508. goto out;
  509. }
  510. }
  511. }
  512. }
  513. bytes -= expected_size; /* one message was received and handled */
  514. if (bytes > 0) {
  515. /* we may have started reading the next message, move this message to beginning of msg
  516. * buffer and reset expected size */
  517. memmove(msg, (void*)msg + expected_size, bytes);
  518. expected_size = IPC_MSG_MINIMAL_SIZE;
  519. if (bytes >= IPC_MSG_MINIMAL_SIZE)
  520. expected_size = msg->size;
  521. }
  522. } while (bytes > 0);
  523. ret = 0;
  524. out:
  525. free(msg);
  526. return ret;
  527. }
  528. /* Main routine of the IPC helper thread. IPC helper thread is spawned when the first IPC port is
  529. * added and is terminated only when the whole Graphene application terminates. IPC helper thread
  530. * runs in an endless loop and waits on port events (either the addition/removal of ports or actual
  531. * port events: acceptance of new client or receiving/sending messages). In particular, IPC helper
  532. * thread calls receive_ipc_message() if a message arrives on port.
  533. *
  534. * Other threads add and remove IPC ports via add_ipc_xxx() and del_ipc_xxx() functions. These ports
  535. * are added to port_list which the IPC helper thread consults before each DkStreamsWaitEvents().
  536. *
  537. * Note that ports are copied from global port_list to local object_list. This is because ports may
  538. * be removed from port_list by other threads while IPC helper thread is waiting on
  539. * DkStreamsWaitEvents(). For this reason IPC thread also get references to all current ports and
  540. * puts them after handling all ports in object_list.
  541. *
  542. * Previous implementation went to great lengths to keep changes to the list of current ports to a
  543. * minimum (instead of repopulating the list before each wait like in current code). Unfortunately,
  544. * this resulted in undue complexity. Current implementation should perform fine for usual case of
  545. * <100 IPC ports and with IPC helper thread always running in background on its own core.
  546. */
  547. noreturn static void shim_ipc_helper(void* dummy) {
  548. __UNUSED(dummy);
  549. struct shim_thread* self = get_cur_thread();
  550. /* Initialize two lists:
  551. * - `ports` collects IPC port objects and is the main list we process here
  552. * - `pals` collects PAL handles of IPC port objects; always contains install_new_event */
  553. size_t ports_cnt = 0;
  554. size_t ports_max_cnt = 32;
  555. struct shim_ipc_port** ports = malloc(sizeof(*ports) * ports_max_cnt);
  556. if (!ports) {
  557. debug("shim_ipc_helper: allocation of ports failed\n");
  558. goto out_err;
  559. }
  560. PAL_HANDLE* pals = malloc(sizeof(*pals) * (1 + ports_max_cnt));
  561. if (!pals) {
  562. debug("shim_ipc_helper: allocation of pals failed\n");
  563. goto out_err;
  564. }
  565. /* allocate one memory region to hold two PAL_FLG arrays: events and revents */
  566. PAL_FLG* pal_events = malloc(sizeof(*pal_events) * (1 + ports_max_cnt) * 2);
  567. if (!pal_events) {
  568. debug("shim_ipc_helper: allocation of pal_events failed\n");
  569. goto out_err;
  570. }
  571. PAL_FLG* ret_events = pal_events + 1 + ports_max_cnt;
  572. PAL_HANDLE install_new_event_pal = event_handle(&install_new_event);
  573. pals[0] = install_new_event_pal;
  574. pal_events[0] = PAL_WAIT_READ;
  575. ret_events[0] = 0;
  576. while (true) {
  577. lock(&ipc_helper_lock);
  578. if (ipc_helper_state != HELPER_ALIVE) {
  579. ipc_helper_thread = NULL;
  580. unlock(&ipc_helper_lock);
  581. break;
  582. }
  583. /* iterate through all known ports from `port_list` to repopulate `ports` */
  584. ports_cnt = 0;
  585. struct shim_ipc_port* port;
  586. struct shim_ipc_port* tmp;
  587. LISTP_FOR_EACH_ENTRY_SAFE(port, tmp, &port_list, list) {
  588. /* get port reference so it is not freed while we wait on/handle it */
  589. __get_ipc_port(port);
  590. if (ports_cnt == ports_max_cnt) {
  591. /* grow `ports` and `pals` to accommodate more objects */
  592. struct shim_ipc_port** tmp_ports = malloc(sizeof(*tmp_ports) * ports_max_cnt * 2);
  593. if (!tmp_ports) {
  594. debug("shim_ipc_helper: allocation of tmp_ports failed\n");
  595. goto out_err_unlock;
  596. }
  597. PAL_HANDLE* tmp_pals = malloc(sizeof(*tmp_pals) * (1 + ports_max_cnt * 2));
  598. if (!tmp_pals) {
  599. debug("shim_ipc_helper: allocation of tmp_pals failed\n");
  600. goto out_err_unlock;
  601. }
  602. PAL_FLG* tmp_pal_events = malloc(sizeof(*tmp_pal_events) * (2 + ports_max_cnt * 4));
  603. if (!tmp_pal_events) {
  604. debug("shim_ipc_helper: allocation of tmp_pal_events failed\n");
  605. goto out_err_unlock;
  606. }
  607. PAL_FLG* tmp_ret_events = tmp_pal_events + 1 + ports_max_cnt * 2;
  608. memcpy(tmp_ports, ports, sizeof(*tmp_ports) * ports_max_cnt);
  609. memcpy(tmp_pals, pals, sizeof(*tmp_pals) * (1 + ports_max_cnt));
  610. memcpy(tmp_pal_events, pal_events, sizeof(*tmp_pal_events) * (1 + ports_max_cnt));
  611. memcpy(tmp_ret_events, ret_events, sizeof(*tmp_ret_events) * (1 + ports_max_cnt));
  612. ports_max_cnt *= 2;
  613. free(ports);
  614. free(pals);
  615. free(pal_events);
  616. ports = tmp_ports;
  617. pals = tmp_pals;
  618. pal_events = tmp_pal_events;
  619. ret_events = tmp_ret_events;
  620. }
  621. /* re-add this port to ports/pals/events */
  622. ports[ports_cnt] = port;
  623. pals[ports_cnt + 1] = port->pal_handle;
  624. pal_events[ports_cnt + 1] = PAL_WAIT_READ;
  625. ret_events[ports_cnt + 1] = 0;
  626. ports_cnt++;
  627. debug("Listening to process %u on port %p (handle %p, type %04x)\n",
  628. port->vmid & 0xFFFF, port, port->pal_handle, port->type);
  629. }
  630. unlock(&ipc_helper_lock);
  631. /* wait on collected ports' PAL handles + install_new_event_pal */
  632. PAL_BOL polled = DkStreamsWaitEvents(ports_cnt + 1, pals, pal_events, ret_events, NO_TIMEOUT);
  633. for (size_t i = 0; polled && i < ports_cnt + 1; i++) {
  634. if (ret_events[i]) {
  635. if (pals[i] == install_new_event_pal) {
  636. /* some thread wants to install new event; this event is found in `ports`, so
  637. * just re-init install_new_event */
  638. debug("New IPC event was requested (port was added/removed)\n");
  639. clear_event(&install_new_event);
  640. continue;
  641. }
  642. /* it is not install_new_event handle, so must be one of ports */
  643. assert(i > 0);
  644. struct shim_ipc_port* polled_port = ports[i - 1];
  645. assert(polled_port);
  646. if (polled_port->type & IPC_PORT_SERVER) {
  647. /* server port: accept client, create client port, and add it to port list */
  648. PAL_HANDLE client = DkStreamWaitForClient(polled_port->pal_handle);
  649. if (client) {
  650. /* type of client port is the same as original server port but with LISTEN
  651. * (for remote client) and without SERVER (doesn't wait for new clients) */
  652. IDTYPE client_type = (polled_port->type & ~IPC_PORT_SERVER) | IPC_PORT_LISTEN;
  653. add_ipc_port_by_id(polled_port->vmid, client, client_type, NULL, NULL);
  654. } else {
  655. debug("Port %p (handle %p) was removed during accepting client\n",
  656. polled_port, polled_port->pal_handle);
  657. del_ipc_port_fini(polled_port, -ECHILD);
  658. }
  659. } else {
  660. PAL_STREAM_ATTR attr;
  661. if (DkStreamAttributesQueryByHandle(polled_port->pal_handle, &attr)) {
  662. /* can read on this port, so receive messages */
  663. if (attr.readable) {
  664. /* NOTE: IPC helper thread does not handle failures currently */
  665. receive_ipc_message(polled_port);
  666. }
  667. if (attr.disconnected) {
  668. debug("Port %p (handle %p) disconnected\n",
  669. polled_port, polled_port->pal_handle);
  670. del_ipc_port_fini(polled_port, -ECONNRESET);
  671. }
  672. } else {
  673. debug("Port %p (handle %p) was removed during attr querying\n",
  674. polled_port, polled_port->pal_handle);
  675. del_ipc_port_fini(polled_port, -PAL_ERRNO);
  676. }
  677. }
  678. }
  679. }
  680. /* done handling ports; put their references so they can be freed */
  681. for (size_t i = 0; i < ports_cnt; i++)
  682. put_ipc_port(ports[i]);
  683. }
  684. free(ports);
  685. free(pals);
  686. free(pal_events);
  687. __disable_preempt(self->shim_tcb);
  688. put_thread(self);
  689. debug("IPC helper thread terminated\n");
  690. DkThreadExit(/*clear_child_tid=*/NULL);
  691. out_err_unlock:
  692. unlock(&ipc_helper_lock);
  693. out_err:
  694. debug("Terminating the process due to a fatal error in ipc helper\n");
  695. put_thread(self);
  696. DkProcessExit(1);
  697. }
  698. static void shim_ipc_helper_prepare(void* arg) {
  699. struct shim_thread* self = (struct shim_thread*)arg;
  700. if (!arg)
  701. return;
  702. shim_tcb_init();
  703. set_cur_thread(self);
  704. update_fs_base(0);
  705. debug_setbuf(shim_get_tcb(), true);
  706. lock(&ipc_helper_lock);
  707. bool notme = (self != ipc_helper_thread);
  708. unlock(&ipc_helper_lock);
  709. void* stack = allocate_stack(IPC_HELPER_STACK_SIZE, g_pal_alloc_align, false);
  710. if (notme || !stack) {
  711. free(stack);
  712. put_thread(self);
  713. DkThreadExit(/*clear_child_tid=*/NULL);
  714. return;
  715. }
  716. debug("IPC helper thread started\n");
  717. /* swap stack to be sure we don't drain the small stack PAL provides */
  718. self->stack_top = stack + IPC_HELPER_STACK_SIZE;
  719. self->stack = stack;
  720. __SWITCH_STACK(self->stack_top, shim_ipc_helper, NULL);
  721. }
  722. /* this should be called with the ipc_helper_lock held */
  723. static int create_ipc_helper(void) {
  724. assert(locked(&ipc_helper_lock));
  725. if (ipc_helper_state == HELPER_ALIVE)
  726. return 0;
  727. struct shim_thread* new = get_new_internal_thread();
  728. if (!new)
  729. return -ENOMEM;
  730. ipc_helper_thread = new;
  731. ipc_helper_state = HELPER_ALIVE;
  732. PAL_HANDLE handle = thread_create(shim_ipc_helper_prepare, new);
  733. if (!handle) {
  734. int ret = -PAL_ERRNO; /* put_thread() may overwrite errno */
  735. ipc_helper_thread = NULL;
  736. ipc_helper_state = HELPER_NOTALIVE;
  737. put_thread(new);
  738. return ret;
  739. }
  740. new->pal_handle = handle;
  741. return 0;
  742. }
  743. /* On success, the reference to ipc helper thread is returned with refcount incremented. It is the
  744. * responsibility of caller to wait for ipc helper's exit and then release the final reference to
  745. * free related resources (it is problematic for the thread itself to release its own resources e.g.
  746. * stack).
  747. */
  748. struct shim_thread* terminate_ipc_helper(void) {
  749. /* First check if thread is alive. */
  750. lock(&ipc_helper_lock);
  751. if (ipc_helper_state != HELPER_ALIVE) {
  752. unlock(&ipc_helper_lock);
  753. return NULL;
  754. }
  755. unlock(&ipc_helper_lock);
  756. /* NOTE: Graphene doesn't have an abstraction of a queue of pending signals between
  757. * communicating processes (instead all communication is done over streams). Thus, app code like
  758. * this (found in e.g. Lmbench's bw_unix):
  759. * kill(child, SIGKILL);
  760. * exit(0);
  761. * results in a data race between the SIGKILL message sent over IPC stream and the parent
  762. * process exiting. In the worst case, the parent will exit before the SIGKILL message goes
  763. * through the host-OS stream, the host OS will close the stream, and the message will never be
  764. * seen by child. To prevent such cases, we simply wait for a bit before exiting.
  765. */
  766. debug(
  767. "Waiting for 0.5s for all in-flight IPC messages to reach their destinations\n");
  768. DkThreadDelayExecution(500000); /* in microseconds */
  769. lock(&ipc_helper_lock);
  770. if (ipc_helper_state != HELPER_ALIVE) {
  771. unlock(&ipc_helper_lock);
  772. return NULL;
  773. }
  774. struct shim_thread* ret = ipc_helper_thread;
  775. if (ret)
  776. get_thread(ret);
  777. ipc_helper_state = HELPER_NOTALIVE;
  778. unlock(&ipc_helper_lock);
  779. /* force wake up of ipc helper thread so that it exits */
  780. set_event(&install_new_event, 1);
  781. return ret;
  782. }