shim_ipc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_ipc.c
  15. *
  16. * This file contains codes to maintain generic bookkeeping of IPC: operations
  17. * on shim_ipc_msg (one-way IPC messages), shim_ipc_msg_duplex (IPC messages
  18. * with acknowledgement), shim_ipc_info (IPC ports of process), shim_process.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_utils.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_ipc.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_unistd.h>
  27. #include <shim_profile.h>
  28. #include <pal.h>
  29. #include <pal_error.h>
  30. #include <list.h>
  31. #define IPC_INFO_MGR_ALLOC 32
  32. #define PAGE_SIZE allocsize
  33. #define OBJ_TYPE struct shim_ipc_info
  34. #include "memmgr.h"
  35. static MEM_MGR ipc_info_mgr;
  36. struct shim_lock ipc_info_lock;
  37. struct shim_process cur_process;
  38. #define CLIENT_HASH_LEN 6
  39. #define CLIENT_HASH_NUM (1 << CLIENT_HASH_LEN)
  40. #define CLIENT_HASH_MASK (CLIENT_HASH_NUM - 1)
  41. #define CLIENT_HASH(vmid) ((vmid) & CLIENT_HASH_MASK)
  42. DEFINE_LISTP(shim_ipc_info);
  43. static LISTP_TYPE(shim_ipc_info) info_hlist[CLIENT_HASH_NUM];
  44. DEFINE_PROFILE_CATEGORY(ipc, );
  45. DEFINE_PROFILE_OCCURENCE(syscall_use_ipc, ipc);
  46. int init_ipc_ports(void);
  47. int init_ns_pid(void);
  48. int init_ns_sysv(void);
  49. int init_ipc(void) {
  50. int ret = 0;
  51. create_lock(&ipc_info_lock);
  52. if (!(ipc_info_mgr = create_mem_mgr(init_align_up(IPC_INFO_MGR_ALLOC))))
  53. return -ENOMEM;
  54. if ((ret = init_ipc_ports()) < 0)
  55. return ret;
  56. if ((ret = init_ns_pid()) < 0)
  57. return ret;
  58. if ((ret = init_ns_sysv()) < 0)
  59. return ret;
  60. return 0;
  61. }
  62. int prepare_ns_leaders(void) {
  63. int ret = 0;
  64. if ((ret = prepare_pid_leader()) < 0)
  65. return ret;
  66. if ((ret = prepare_sysv_leader()) < 0)
  67. return ret;
  68. return 0;
  69. }
  70. static struct shim_ipc_info* __create_ipc_info(IDTYPE vmid, const char* uri, size_t len) {
  71. struct shim_ipc_info* info =
  72. get_mem_obj_from_mgr_enlarge(ipc_info_mgr, size_align_up(IPC_INFO_MGR_ALLOC));
  73. if (!info)
  74. return NULL;
  75. memset(info, 0, sizeof(struct shim_ipc_info));
  76. info->vmid = vmid;
  77. if (uri)
  78. qstrsetstr(&info->uri, uri, len);
  79. REF_SET(info->ref_count, 1);
  80. INIT_LIST_HEAD(info, hlist);
  81. return info;
  82. }
  83. static void __free_ipc_info(struct shim_ipc_info* info) {
  84. if (info->pal_handle) {
  85. DkObjectClose(info->pal_handle);
  86. info->pal_handle = NULL;
  87. }
  88. if (info->port)
  89. put_ipc_port(info->port);
  90. qstrfree(&info->uri);
  91. free_mem_obj_to_mgr(ipc_info_mgr, info);
  92. }
  93. static void __get_ipc_info(struct shim_ipc_info* info) {
  94. REF_INC(info->ref_count);
  95. }
  96. static void __put_ipc_info(struct shim_ipc_info* info) {
  97. int ref_count = REF_DEC(info->ref_count);
  98. if (!ref_count)
  99. __free_ipc_info(info);
  100. }
  101. void get_ipc_info(struct shim_ipc_info* info) {
  102. /* no need to grab ipc_info_lock because __get_ipc_info() is atomic */
  103. __get_ipc_info(info);
  104. }
  105. void put_ipc_info(struct shim_ipc_info* info) {
  106. /* this is atomic so we don't grab lock in common case of ref_count > 0 */
  107. int ref_count = REF_DEC(info->ref_count);
  108. if (!ref_count) {
  109. lock(&ipc_info_lock);
  110. __free_ipc_info(info);
  111. unlock(&ipc_info_lock);
  112. }
  113. }
  114. struct shim_ipc_info* create_ipc_info(IDTYPE vmid, const char* uri, size_t len) {
  115. lock(&ipc_info_lock);
  116. struct shim_ipc_info* info = __create_ipc_info(vmid, uri, len);
  117. unlock(&ipc_info_lock);
  118. return info;
  119. }
  120. struct shim_ipc_info* create_ipc_info_in_list(IDTYPE vmid, const char* uri) {
  121. assert(vmid);
  122. struct shim_ipc_info* info;
  123. size_t len = strlen(uri);
  124. lock(&ipc_info_lock);
  125. /* check if info with this vmid and uri already exists and return it */
  126. LISTP_TYPE(shim_ipc_info)* info_bucket = &info_hlist[CLIENT_HASH(vmid)];
  127. LISTP_FOR_EACH_ENTRY(info, info_bucket, hlist)
  128. if (info->vmid == vmid && !qstrcmpstr(&info->uri, uri, len)) {
  129. get_ipc_info(info);
  130. unlock(&ipc_info_lock);
  131. return info;
  132. }
  133. /* otherwise create new info and return it */
  134. info = __create_ipc_info(vmid, uri, len);
  135. if (info) {
  136. LISTP_ADD(info, info_bucket, hlist);
  137. get_ipc_info(info);
  138. }
  139. unlock(&ipc_info_lock);
  140. return info;
  141. }
  142. void put_ipc_info_in_list(struct shim_ipc_info* info) {
  143. LISTP_TYPE(shim_ipc_info)* info_bucket = &info_hlist[CLIENT_HASH(info->vmid)];
  144. lock(&ipc_info_lock);
  145. __put_ipc_info(info);
  146. if (REF_GET(info->ref_count) == 1) {
  147. LISTP_DEL_INIT(info, info_bucket, hlist);
  148. __put_ipc_info(info);
  149. }
  150. unlock(&ipc_info_lock);
  151. }
  152. struct shim_ipc_info* lookup_ipc_info(IDTYPE vmid) {
  153. assert(vmid);
  154. lock(&ipc_info_lock);
  155. struct shim_ipc_info* info;
  156. LISTP_TYPE(shim_ipc_info)* info_bucket = &info_hlist[CLIENT_HASH(vmid)];
  157. LISTP_FOR_EACH_ENTRY(info, info_bucket, hlist)
  158. if (info->vmid == vmid && !qstrempty(&info->uri)) {
  159. __get_ipc_info(info);
  160. unlock(&ipc_info_lock);
  161. return info;
  162. }
  163. unlock(&ipc_info_lock);
  164. return NULL;
  165. }
  166. struct shim_process* create_process(void) {
  167. struct shim_process* new_process = calloc(1, sizeof(struct shim_process));
  168. if (!new_process)
  169. return NULL;
  170. new_process->parent = create_ipc_info(cur_process.vmid, NULL, 0);
  171. lock(&cur_process.lock);
  172. if (cur_process.self)
  173. qstrcopy(&new_process->parent->uri, &cur_process.self->uri);
  174. for (int i = 0; i < TOTAL_NS; i++) {
  175. if (cur_process.ns[i])
  176. new_process->ns[i] = create_ipc_info(cur_process.ns[i]->vmid,
  177. qstrgetstr(&cur_process.ns[i]->uri),
  178. cur_process.ns[i]->uri.len);
  179. }
  180. unlock(&cur_process.lock);
  181. return new_process;
  182. }
  183. void free_process(struct shim_process* process) {
  184. if (process->self)
  185. put_ipc_info(process->self);
  186. if (process->parent)
  187. put_ipc_info(process->parent);
  188. for (int i = 0; i < TOTAL_NS; i++)
  189. if (process->ns[i])
  190. put_ipc_info(process->ns[i]);
  191. free(process);
  192. }
  193. void init_ipc_msg(struct shim_ipc_msg* msg, int code, size_t size, IDTYPE dest) {
  194. msg->code = code;
  195. msg->size = get_ipc_msg_size(size);
  196. msg->src = cur_process.vmid;
  197. msg->dst = dest;
  198. msg->seq = 0;
  199. }
  200. void init_ipc_msg_duplex(struct shim_ipc_msg_duplex* msg, int code, size_t size, IDTYPE dest) {
  201. init_ipc_msg(&msg->msg, code, size, dest);
  202. msg->thread = NULL;
  203. INIT_LIST_HEAD(msg, list);
  204. msg->retval = 0;
  205. msg->private = NULL;
  206. }
  207. int send_ipc_message(struct shim_ipc_msg* msg, struct shim_ipc_port* port) {
  208. assert(msg->size >= IPC_MSG_MINIMAL_SIZE);
  209. msg->src = cur_process.vmid;
  210. debug("Sending ipc message to port %p (handle %p)\n", port, port->pal_handle);
  211. size_t total_bytes = msg->size;
  212. size_t bytes = 0;
  213. do {
  214. size_t ret = DkStreamWrite(port->pal_handle, 0, total_bytes - bytes,
  215. (void *)msg + bytes, NULL);
  216. if (!ret) {
  217. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN || PAL_ERRNO == EWOULDBLOCK)
  218. continue;
  219. debug("Port %p (handle %p) was removed during sending\n", port, port->pal_handle);
  220. del_ipc_port_fini(port, -ECHILD);
  221. return -PAL_ERRNO;
  222. }
  223. bytes += ret;
  224. } while (bytes < total_bytes);
  225. return 0;
  226. }
  227. struct shim_ipc_msg_duplex* pop_ipc_msg_duplex(struct shim_ipc_port* port, unsigned long seq) {
  228. struct shim_ipc_msg_duplex* found = NULL;
  229. lock(&port->msgs_lock);
  230. struct shim_ipc_msg_duplex* tmp;
  231. LISTP_FOR_EACH_ENTRY(tmp, &port->msgs, list)
  232. if (tmp->msg.seq == seq) {
  233. found = tmp;
  234. LISTP_DEL_INIT(tmp, &port->msgs, list);
  235. break;
  236. }
  237. unlock(&port->msgs_lock);
  238. return found;
  239. }
  240. int send_ipc_message_duplex(struct shim_ipc_msg_duplex* msg, struct shim_ipc_port* port,
  241. unsigned long* seq, void* private_data) {
  242. int ret = 0;
  243. struct shim_thread * thread = get_cur_thread();
  244. assert(thread);
  245. /* prepare thread which sends the message for waiting for response
  246. * (this also acquires reference to the thread) */
  247. if (!msg->thread)
  248. thread_setwait(&msg->thread, thread);
  249. static struct atomic_int ipc_seq_counter;
  250. msg->msg.seq = atomic_inc_return(&ipc_seq_counter);
  251. /* save the message to list of port msgs together with its private data */
  252. lock(&port->msgs_lock);
  253. msg->private = private_data;
  254. LISTP_ADD_TAIL(msg, &port->msgs, list);
  255. unlock(&port->msgs_lock);
  256. ret = send_ipc_message(&msg->msg, port);
  257. if (ret < 0)
  258. goto out;
  259. if (seq)
  260. *seq = msg->msg.seq;
  261. debug("Start waiting for response (seq = %lu)\n", msg->msg.seq);
  262. /* force thread which sends the message to wait for response;
  263. * ignore unrelated interrupts but fail on actual errors */
  264. do {
  265. ret = thread_sleep(NO_TIMEOUT);
  266. if (ret < 0 && ret != -EINTR && ret != -EAGAIN)
  267. goto out;
  268. } while (ret != 0);
  269. debug("Finished waiting for response (seq = %lu, ret = %d)\n",
  270. msg->msg.seq, msg->retval);
  271. ret = msg->retval;
  272. out:
  273. lock(&port->msgs_lock);
  274. if (!LIST_EMPTY(msg, list))
  275. LISTP_DEL_INIT(msg, &port->msgs, list);
  276. unlock(&port->msgs_lock);
  277. if (msg->thread) {
  278. /* put reference to the thread acquired earlier */
  279. put_thread(msg->thread);
  280. msg->thread = NULL;
  281. }
  282. return ret;
  283. }
  284. /* must be called with cur_process.lock taken */
  285. struct shim_ipc_info* create_ipc_info_cur_process(void) {
  286. struct shim_ipc_info* info = create_ipc_info(cur_process.vmid, NULL, 0);
  287. if (!info)
  288. return NULL;
  289. char uri[PIPE_URI_SIZE];
  290. if (create_pipe(NULL, uri, PIPE_URI_SIZE, &info->pal_handle,
  291. &info->uri) < 0) {
  292. put_ipc_info(info);
  293. return NULL;
  294. }
  295. add_ipc_port_by_id(0, info->pal_handle, IPC_PORT_SERVER,
  296. NULL, &info->port);
  297. return info;
  298. }
  299. int get_ipc_info_cur_process(struct shim_ipc_info** info) {
  300. lock(&cur_process.lock);
  301. if (!cur_process.self) {
  302. cur_process.self = create_ipc_info_cur_process();
  303. if (!cur_process.self) {
  304. unlock(&cur_process.lock);
  305. return -EACCES;
  306. }
  307. }
  308. get_ipc_info(cur_process.self);
  309. *info = cur_process.self;
  310. unlock(&cur_process.lock);
  311. return 0;
  312. }
  313. DEFINE_PROFILE_INTERVAL(ipc_checkpoint_send, ipc);
  314. DEFINE_PROFILE_INTERVAL(ipc_checkpoint_callback, ipc);
  315. /* Graphene's checkpoint() syscall broadcasts a msg to all processes
  316. * asking to checkpoint their state and save in process-unique file in
  317. * directory cpdir under session cpsession. */
  318. int ipc_checkpoint_send(const char* cpdir, IDTYPE cpsession) {
  319. BEGIN_PROFILE_INTERVAL();
  320. int ret;
  321. size_t len = strlen(cpdir);
  322. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_checkpoint) + len);
  323. struct shim_ipc_msg* msg = __alloca(total_msg_size);
  324. init_ipc_msg(msg, IPC_CHECKPOINT, total_msg_size, 0);
  325. struct shim_ipc_checkpoint* msgin = (struct shim_ipc_checkpoint *) &msg->msg;
  326. msgin->cpsession = cpsession;
  327. memcpy(&msgin->cpdir, cpdir, len + 1);
  328. debug("IPC broadcast to all: IPC_CHECKPOINT(%u, %s)\n", cpsession, cpdir);
  329. /* broadcast to all including myself (so I can also checkpoint) */
  330. ret = broadcast_ipc(msg, IPC_PORT_DIRCLD|IPC_PORT_DIRPRT, /*exclude_port*/ NULL);
  331. SAVE_PROFILE_INTERVAL(ipc_checkpoint_send);
  332. return ret;
  333. }
  334. /* This process is asked to create a checkpoint, so it:
  335. * - sends a Graphene-specific SIGCP signal to all its threads (for
  336. * all to stop and join the checkpoint for consistent state),
  337. * - broadcasts checkpoint msg further to other processes. */
  338. int ipc_checkpoint_callback(struct shim_ipc_msg* msg, struct shim_ipc_port* port) {
  339. BEGIN_PROFILE_INTERVAL();
  340. int ret = 0;
  341. struct shim_ipc_checkpoint* msgin = (struct shim_ipc_checkpoint *) msg->msg;
  342. debug("IPC callback from %u: IPC_CHECKPOINT(%u, %s)\n",
  343. msg->src, msgin->cpsession, msgin->cpdir);
  344. ret = create_checkpoint(msgin->cpdir, &msgin->cpsession);
  345. if (ret < 0)
  346. goto out;
  347. kill_all_threads(NULL, msgin->cpsession, SIGCP);
  348. broadcast_ipc(msg, IPC_PORT_DIRCLD|IPC_PORT_DIRPRT, port);
  349. out:
  350. SAVE_PROFILE_INTERVAL(ipc_checkpoint_callback);
  351. return ret;
  352. }
  353. BEGIN_CP_FUNC(ipc_info) {
  354. assert(size == sizeof(struct shim_ipc_info));
  355. struct shim_ipc_info* info = (struct shim_ipc_info *) obj;
  356. struct shim_ipc_info* new_info = NULL;
  357. ptr_t off = GET_FROM_CP_MAP(obj);
  358. if (!off) {
  359. off = ADD_CP_OFFSET(sizeof(struct shim_ipc_info));
  360. ADD_TO_CP_MAP(obj, off);
  361. new_info = (struct shim_ipc_info *) (base + off);
  362. memcpy(new_info, info, sizeof(struct shim_ipc_info));
  363. REF_SET(new_info->ref_count, 0);
  364. /* call qstr-specific checkpointing function for new_info->uri */
  365. DO_CP_IN_MEMBER(qstr, new_info, uri);
  366. if (info->pal_handle && info->pal_handle != IPC_FORCE_RECONNECT) {
  367. struct shim_palhdl_entry* entry;
  368. /* call palhdl-specific checkpointing function to checkpoint
  369. * info->pal_handle and return created object in entry */
  370. DO_CP(palhdl, info->pal_handle, &entry);
  371. /* info's PAL handle will be re-opened with new URI during
  372. * palhdl restore (see checkpoint.c) */
  373. entry->uri = &new_info->uri;
  374. entry->phandle = &new_info->pal_handle;
  375. }
  376. } else {
  377. /* already checkpointed */
  378. new_info = (struct shim_ipc_info *) (base + off);
  379. }
  380. if (new_info && objp)
  381. *objp = (void *) new_info;
  382. }
  383. END_CP_FUNC_NO_RS(ipc_info)
  384. BEGIN_CP_FUNC(process) {
  385. assert(size == sizeof(struct shim_process));
  386. struct shim_process* process = (struct shim_process *) obj;
  387. struct shim_process* new_process = NULL;
  388. ptr_t off = GET_FROM_CP_MAP(obj);
  389. if (!off) {
  390. off = ADD_CP_OFFSET(sizeof(struct shim_process));
  391. ADD_TO_CP_MAP(obj, off);
  392. new_process = (struct shim_process *) (base + off);
  393. memcpy(new_process, process, sizeof(struct shim_process));
  394. /* call ipc_info-specific checkpointing functions
  395. * for new_process's self, parent, and ns infos */
  396. if (process->self)
  397. DO_CP_MEMBER(ipc_info, process, new_process, self);
  398. if (process->parent)
  399. DO_CP_MEMBER(ipc_info, process, new_process, parent);
  400. for (int i = 0; i < TOTAL_NS; i++)
  401. if (process->ns[i])
  402. DO_CP_MEMBER(ipc_info, process, new_process, ns[i]);
  403. ADD_CP_FUNC_ENTRY(off);
  404. } else {
  405. /* already checkpointed */
  406. new_process = (struct shim_process *) (base + off);
  407. }
  408. if (objp)
  409. *objp = (void *) new_process;
  410. }
  411. END_CP_FUNC(process)
  412. BEGIN_RS_FUNC(process) {
  413. __UNUSED(offset);
  414. struct shim_process* process = (void *) (base + GET_CP_FUNC_ENTRY());
  415. CP_REBASE(process->self);
  416. CP_REBASE(process->parent);
  417. CP_REBASE(process->ns);
  418. if (process->self) {
  419. process->self->vmid = cur_process.vmid;
  420. get_ipc_info(process->self);
  421. }
  422. if (process->parent)
  423. get_ipc_info(process->parent);
  424. for (int i = 0; i < TOTAL_NS; i++)
  425. if (process->ns[i])
  426. get_ipc_info(process->ns[i]);
  427. process->vmid = cur_process.vmid;
  428. memcpy(&cur_process, process, sizeof(struct shim_process));
  429. create_lock(&cur_process.lock);
  430. DEBUG_RS("vmid=%u,uri=%s,parent=%u(%s)", process->vmid,
  431. process->self ? qstrgetstr(&process->self->uri) : "",
  432. process->parent ? process->parent->vmid : 0,
  433. process->parent ? qstrgetstr(&process->parent->uri) : "");
  434. }
  435. END_RS_FUNC(process)