shim_checkpoint.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_checkpoint.c
  17. *
  18. * This file contains codes for checkpoint / migration scheme of library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_utils.h>
  22. #include <shim_thread.h>
  23. #include <shim_handle.h>
  24. #include <shim_vma.h>
  25. #include <shim_fs.h>
  26. #include <shim_checkpoint.h>
  27. #include <shim_ipc.h>
  28. #include <shim_profile.h>
  29. #include <pal.h>
  30. #include <pal_error.h>
  31. #include <list.h>
  32. #include <stdarg.h>
  33. #include <asm/fcntl.h>
  34. #include <asm/mman.h>
  35. DEFINE_PROFILE_CATAGORY(migrate, );
  36. DEFINE_PROFILE_CATAGORY(checkpoint, migrate);
  37. DEFINE_PROFILE_INTERVAL(checkpoint_create_map, checkpoint);
  38. DEFINE_PROFILE_INTERVAL(checkpoint_copy, checkpoint);
  39. DEFINE_PROFILE_CATAGORY(checkpoint_func, checkpoint);
  40. DEFINE_PROFILE_INTERVAL(checkpoint_destroy_map, checkpoint);
  41. DEFINE_PROFILE_OCCURENCE(checkpoint_count, checkpoint);
  42. DEFINE_PROFILE_OCCURENCE(checkpoint_total_size, checkpoint);
  43. DEFINE_PROFILE_CATAGORY(resume, migrate);
  44. DEFINE_PROFILE_INTERVAL(child_created_in_new_process, resume);
  45. DEFINE_PROFILE_INTERVAL(child_wait_header, resume);
  46. DEFINE_PROFILE_INTERVAL(child_receive_header, resume);
  47. DEFINE_PROFILE_INTERVAL(do_migration, resume);
  48. DEFINE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc, resume);
  49. DEFINE_PROFILE_INTERVAL(child_load_memory_by_gipc, resume);
  50. DEFINE_PROFILE_INTERVAL(child_load_checkpoint_on_pipe, resume);
  51. DEFINE_PROFILE_INTERVAL(child_receive_handles, resume);
  52. DEFINE_PROFILE_INTERVAL(restore_checkpoint, resume);
  53. DEFINE_PROFILE_CATAGORY(resume_func, resume);
  54. DEFINE_PROFILE_INTERVAL(child_total_migration_time, resume);
  55. #define CP_HASH_SIZE 256
  56. #define CP_HASH(addr) ((hashfunc((ptr_t)(addr))) & (CP_HASH_SIZE - 1))
  57. typedef uint16_t FASTHASHTYPE;
  58. #define CP_MAP_ENTRY_NUM 64
  59. DEFINE_LIST(cp_map_entry);
  60. struct cp_map_entry
  61. {
  62. LIST_TYPE(cp_map_entry) hlist;
  63. struct shim_cp_map_entry entry;
  64. };
  65. DEFINE_LISTP(cp_map_entry);
  66. struct cp_map {
  67. struct cp_map_buffer {
  68. struct cp_map_buffer * next;
  69. int num, cnt;
  70. struct cp_map_entry entries[0];
  71. } * buffers;
  72. struct hash_map {
  73. LISTP_TYPE(cp_map_entry) head[CP_HASH_SIZE];
  74. } map;
  75. };
  76. void * create_cp_map (void)
  77. {
  78. void * data = malloc(sizeof(struct cp_map) + sizeof(struct cp_map_buffer) +
  79. sizeof(struct cp_map_entry) * CP_MAP_ENTRY_NUM);
  80. if (!data)
  81. return NULL;
  82. struct cp_map * map = (struct cp_map *) data;
  83. struct cp_map_buffer * buffer =
  84. (struct cp_map_buffer *) (data + sizeof(struct cp_map));
  85. memset(map, 0, sizeof(*map));
  86. map->buffers = buffer;
  87. buffer->next = NULL;
  88. buffer->num = CP_MAP_ENTRY_NUM;
  89. buffer->cnt = 0;
  90. return (void *) map;
  91. }
  92. void destroy_cp_map (void * map)
  93. {
  94. struct cp_map * m = (struct cp_map *) map;
  95. struct cp_map_buffer * buffer = m->buffers, * next;
  96. for (next = buffer ? buffer->next : NULL ;
  97. buffer && next ;
  98. buffer = next, next = next ? next->next : NULL)
  99. free(buffer);
  100. free(m);
  101. }
  102. static inline
  103. struct cp_map_buffer * extend_cp_map (struct cp_map * map)
  104. {
  105. struct cp_map_buffer * buffer =
  106. malloc(sizeof(struct cp_map_buffer) +
  107. sizeof(struct cp_map_entry) * CP_MAP_ENTRY_NUM);
  108. if (!buffer)
  109. return NULL;
  110. buffer->next = map->buffers;
  111. map->buffers = buffer;
  112. buffer->num = CP_MAP_ENTRY_NUM;
  113. buffer->cnt = 0;
  114. return buffer;
  115. }
  116. struct shim_cp_map_entry *
  117. get_cp_map_entry (void * map, void * addr, bool create)
  118. {
  119. struct cp_map * m = (struct cp_map *) map;
  120. FASTHASHTYPE hash = CP_HASH(addr);
  121. LISTP_TYPE(cp_map_entry) * head = &m->map.head[hash];
  122. struct cp_map_entry * tmp;
  123. struct shim_cp_map_entry * e = NULL;
  124. listp_for_each_entry(tmp, head, hlist)
  125. if (tmp->entry.addr == addr)
  126. e = &tmp->entry;
  127. if (create && !e) {
  128. struct cp_map_buffer * buffer = m->buffers;
  129. if (buffer->cnt == buffer->num)
  130. buffer = extend_cp_map(m);
  131. struct cp_map_entry *new = &buffer->entries[buffer->cnt++];
  132. INIT_LIST_HEAD(new, hlist);
  133. listp_add(new, head, hlist);
  134. new->entry.addr = addr;
  135. new->entry.off = 0;
  136. e = &new->entry;
  137. }
  138. return e;
  139. }
  140. BEGIN_CP_FUNC(memory)
  141. {
  142. struct shim_mem_entry * entry =
  143. (void *) (base + ADD_CP_OFFSET(sizeof(struct shim_mem_entry)));
  144. entry->addr = obj;
  145. entry->size = size;
  146. entry->paddr = NULL;
  147. entry->prot = PAL_PROT_READ|PAL_PROT_WRITE;
  148. entry->data = NULL;
  149. entry->prev = store->last_mem_entry;
  150. store->last_mem_entry = entry;
  151. store->mem_nentries++;
  152. store->mem_size += size;
  153. if (objp)
  154. *objp = entry;
  155. }
  156. END_CP_FUNC_NO_RS(memory)
  157. BEGIN_CP_FUNC(palhdl)
  158. {
  159. ptr_t off = ADD_CP_OFFSET(sizeof(struct shim_palhdl_entry));
  160. struct shim_palhdl_entry * entry = (void *) (base + off);
  161. entry->handle = (PAL_HANDLE) obj;
  162. entry->uri = NULL;
  163. entry->phandle = NULL;
  164. entry->prev = store->last_palhdl_entry;
  165. store->last_palhdl_entry = entry;
  166. store->palhdl_nentries++;
  167. ADD_CP_FUNC_ENTRY(off);
  168. if (objp)
  169. *objp = entry;
  170. }
  171. END_CP_FUNC(palhdl)
  172. BEGIN_RS_FUNC(palhdl)
  173. {
  174. struct shim_palhdl_entry * ent = (void *) (base + GET_CP_FUNC_ENTRY());
  175. if (ent->phandle && !ent->phandle && ent->uri) {
  176. /* XXX: reopen the stream */
  177. }
  178. }
  179. END_RS_FUNC(palhdl)
  180. BEGIN_CP_FUNC(migratable)
  181. {
  182. struct shim_mem_entry * mem_entry;
  183. DO_CP_SIZE(memory, &__migratable, &__migratable_end - &__migratable,
  184. &mem_entry);
  185. struct shim_cp_entry * entry = ADD_CP_FUNC_ENTRY(0UL);
  186. mem_entry->paddr = (void **) &entry->cp_un.cp_val;
  187. }
  188. END_CP_FUNC(migratable)
  189. BEGIN_RS_FUNC(migratable)
  190. {
  191. void * data = (void *) GET_CP_FUNC_ENTRY();
  192. CP_REBASE(data);
  193. memcpy(&__migratable, data, &__migratable_end - &__migratable);
  194. }
  195. END_RS_FUNC(migratable)
  196. BEGIN_CP_FUNC(environ)
  197. {
  198. const char ** e, ** envp = (void *) obj;
  199. int nenvp = 0;
  200. int envp_bytes = 0;
  201. for (e = envp ; *e ; e++) {
  202. nenvp++;
  203. envp_bytes += strlen(*e) + 1;
  204. }
  205. ptr_t off = ADD_CP_OFFSET(sizeof(char *) * (nenvp + 1) + envp_bytes);
  206. const char ** new_envp = (void *) base + off;
  207. char * ptr = (void *) base + off + sizeof(char *) * (nenvp + 1);
  208. for (int i = 0 ; i < nenvp ; i++) {
  209. int len = strlen(envp[i]);
  210. new_envp[i] = ptr;
  211. memcpy(ptr, envp[i], len + 1);
  212. ptr += len + 1;
  213. }
  214. new_envp[nenvp] = NULL;
  215. ADD_CP_FUNC_ENTRY(off);
  216. }
  217. END_CP_FUNC(environ)
  218. BEGIN_RS_FUNC(environ)
  219. {
  220. const char ** envp = (void *) base + GET_CP_FUNC_ENTRY();
  221. const char ** e;
  222. for (e = envp ; *e ; e++) {
  223. CP_REBASE(*e);
  224. DEBUG_RS("%s", *e);
  225. }
  226. initial_envp = envp;
  227. }
  228. END_RS_FUNC(environ)
  229. BEGIN_CP_FUNC(qstr)
  230. {
  231. struct shim_qstr * qstr = (struct shim_qstr *) obj;
  232. if (qstr->len < QSTR_SIZE) {
  233. if (qstr->oflow) {
  234. memcpy(qstr->name, qstr->oflow, qstr->len + 1);
  235. qstr->oflow = NULL;
  236. }
  237. } else {
  238. struct shim_str * str =
  239. (void *) (base + ADD_CP_OFFSET(qstr->len + 1));
  240. memcpy(str, qstr->oflow, qstr->len + 1);
  241. qstr->oflow = str;
  242. ADD_CP_FUNC_ENTRY((ptr_t) qstr - base);
  243. }
  244. }
  245. END_CP_FUNC(qstr)
  246. BEGIN_RS_FUNC(qstr)
  247. {
  248. struct shim_qstr * qstr = (void *) (base + GET_CP_FUNC_ENTRY());
  249. CP_REBASE(qstr->oflow);
  250. }
  251. END_RS_FUNC(qstr)
  252. BEGIN_CP_FUNC(gipc)
  253. {
  254. ptr_t off = ADD_CP_OFFSET(sizeof(struct shim_gipc_entry));
  255. void * send_addr = (void *) ALIGN_DOWN(obj);
  256. size_t send_size = (void *) ALIGN_UP(obj + size) - send_addr;
  257. struct shim_gipc_entry * entry = (void *) (base + off);
  258. entry->mem.addr = send_addr;
  259. entry->mem.size = send_size;
  260. entry->mem.prot = PAL_PROT_READ|PAL_PROT_WRITE;
  261. entry->mem.prev = (void *) store->last_gipc_entry;
  262. store->last_gipc_entry = entry;
  263. store->gipc_nentries++;
  264. #if HASH_GIPC == 1
  265. struct md5_ctx ctx;
  266. md5_init(&ctx);
  267. md5_update(&ctx, send_addr, allocsize);
  268. md5_final(&ctx);
  269. entry->first_hash = *(unsigned long *) ctx.digest;
  270. #endif /* HASH_GIPC == 1 */
  271. ADD_CP_FUNC_ENTRY(off);
  272. if (objp)
  273. *objp = entry;
  274. }
  275. END_CP_FUNC(gipc)
  276. BEGIN_RS_FUNC(gipc)
  277. {
  278. #if HASH_GIPC == 1
  279. struct shim_gipc_entry * entry = (void *) (base + GET_CP_FUNC_ENTRY());
  280. PAL_FLG pal_prot = PAL_PROT(entry->prot, 0);
  281. if (!(pal_prot & PROT_READ))
  282. DkVirtualMemoryProtect(entry->addr, entry->npages * allocsize,
  283. pal_prot|PAL_PROT_READ);
  284. struct md5_ctx ctx;
  285. md5_init(&ctx);
  286. md5_update(&ctx, entry->addr, allocsize);
  287. md5_final(&ctx);
  288. assert(*(unsigned long *) ctx.digest == entry->first_hash);
  289. if (!(pal_prot & PAL_PROT_READ))
  290. DkVirtualMemoryProtect(entry->addr, entry->npages * allocsize,
  291. pal_prot);
  292. #endif /* HASH_GIPC == 1 */
  293. }
  294. END_RS_FUNC(gipc)
  295. static int send_checkpoint_by_gipc (PAL_HANDLE gipc_store,
  296. struct shim_cp_store * store)
  297. {
  298. PAL_PTR hdr_addr = (PAL_PTR) store->base;
  299. PAL_NUM hdr_size = (PAL_NUM) store->offset + store->mem_size;
  300. assert(ALIGNED(hdr_addr));
  301. int mem_nentries = store->mem_nentries;
  302. if (mem_nentries) {
  303. struct shim_mem_entry ** mem_entries =
  304. __alloca(sizeof(struct shim_mem_entry *) * mem_nentries);
  305. int mem_cnt = mem_nentries;
  306. struct shim_mem_entry * mem_ent = store->last_mem_entry;
  307. for (; mem_ent ; mem_ent = mem_ent->prev) {
  308. if (!mem_cnt)
  309. return -EINVAL;
  310. mem_entries[--mem_cnt] = mem_ent;
  311. }
  312. mem_entries += mem_cnt;
  313. mem_nentries -= mem_cnt;
  314. for (int i = 0 ; i < mem_nentries ; i++) {
  315. void * mem_addr = (void *) store->base +
  316. __ADD_CP_OFFSET(mem_entries[i]->size);
  317. assert(store->offset <= hdr_size);
  318. memcpy(mem_addr, mem_entries[i]->addr, mem_entries[i]->size);
  319. mem_entries[i]->data = mem_addr;
  320. }
  321. }
  322. hdr_size = ALIGN_UP(hdr_size);
  323. int npages = DkPhysicalMemoryCommit(gipc_store, 1, &hdr_addr, &hdr_size);
  324. if (!npages)
  325. return -EPERM;
  326. int nentries = store->gipc_nentries;
  327. PAL_PTR * gipc_addrs = __alloca(sizeof(PAL_PTR) * nentries);
  328. PAL_NUM * gipc_sizes = __alloca(sizeof(PAL_NUM) * nentries);
  329. int total_pages = 0;
  330. int cnt = nentries;
  331. struct shim_gipc_entry * ent = store->last_gipc_entry;
  332. for (; ent ; ent = (void *) ent->mem.prev) {
  333. if (!cnt)
  334. return -EINVAL;
  335. cnt--;
  336. gipc_addrs[cnt] = ent->mem.addr;
  337. gipc_sizes[cnt] = ent->mem.size;
  338. total_pages += ent->mem.size / allocsize;
  339. }
  340. gipc_addrs += cnt;
  341. gipc_sizes += cnt;
  342. nentries -= cnt;
  343. /* Chia-Che: sending an empty page can't ever be a smart idea.
  344. we might rather fail here */
  345. npages = DkPhysicalMemoryCommit(gipc_store, nentries, gipc_addrs,
  346. gipc_sizes);
  347. if (npages < total_pages) {
  348. debug("gipc supposed to send %d pages, but only %d pages sent\n",
  349. total_pages, npages);
  350. return -ENOMEM;
  351. }
  352. ADD_PROFILE_OCCURENCE(migrate_send_gipc_pages, npages);
  353. return 0;
  354. }
  355. static int send_checkpoint_on_stream (PAL_HANDLE stream,
  356. struct shim_cp_store * store)
  357. {
  358. int mem_nentries = store->mem_nentries;
  359. struct shim_mem_entry ** mem_entries;
  360. if (mem_nentries) {
  361. mem_entries = __alloca(sizeof(struct shim_mem_entry *) * mem_nentries);
  362. int mem_cnt = mem_nentries;
  363. struct shim_mem_entry * mem_ent = store->last_mem_entry;
  364. for (; mem_ent ; mem_ent = mem_ent->prev) {
  365. if (!mem_cnt)
  366. return -EINVAL;
  367. mem_entries[--mem_cnt] = mem_ent;
  368. }
  369. void * mem_addr = (void *) store->base + store->offset;
  370. mem_entries += mem_cnt;
  371. mem_nentries -= mem_cnt;
  372. for (int i = 0 ; i < mem_nentries ; i++) {
  373. int mem_size = mem_entries[i]->size;
  374. mem_entries[i]->data = mem_addr;
  375. mem_addr += mem_size;
  376. }
  377. }
  378. size_t total_bytes = store->offset;
  379. size_t bytes = 0;
  380. do {
  381. size_t ret = DkStreamWrite(stream, 0, total_bytes - bytes,
  382. (void *) store->base + bytes, NULL);
  383. if (!ret) {
  384. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  385. PAL_ERRNO == EWOULDBLOCK)
  386. continue;
  387. return -PAL_ERRNO;
  388. }
  389. bytes += ret;
  390. } while (bytes < total_bytes);
  391. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, total_bytes);
  392. for (int i = 0 ; i < mem_nentries ; i++) {
  393. size_t mem_size = mem_entries[i]->size;
  394. void * mem_addr = mem_entries[i]->addr;
  395. bytes = 0;
  396. do {
  397. size_t ret = DkStreamWrite(stream, 0, mem_size - bytes,
  398. mem_addr + bytes, NULL);
  399. if (!ret) {
  400. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  401. PAL_ERRNO == EWOULDBLOCK)
  402. continue;
  403. return -PAL_ERRNO;
  404. }
  405. bytes += ret;
  406. } while (bytes < mem_entries[i]->size);
  407. if (!(mem_entries[i]->prot & PAL_PROT_READ))
  408. DkVirtualMemoryProtect(mem_addr, mem_size, mem_entries[i]->prot);
  409. mem_entries[i]->size = mem_size;
  410. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, mem_size);
  411. }
  412. return 0;
  413. }
  414. static int restore_gipc (PAL_HANDLE gipc, struct gipc_header * hdr, ptr_t base,
  415. long rebase)
  416. {
  417. struct shim_gipc_entry * gipc_entries = (void *) (base + hdr->entoffset);
  418. int nentries = hdr->nentries;
  419. if (!nentries)
  420. return 0;
  421. debug("restore memory by gipc: %d entries\n", nentries);
  422. struct shim_gipc_entry ** entries =
  423. __alloca(sizeof(struct shim_gipc_entry *) * nentries);
  424. struct shim_gipc_entry * entry = gipc_entries;
  425. int cnt = nentries;
  426. while (entry) {
  427. CP_REBASE(entry->mem.prev);
  428. CP_REBASE(entry->mem.paddr);
  429. if (!cnt)
  430. return -EINVAL;
  431. entries[--cnt] = entry;
  432. entry = (void *) entry->mem.prev;
  433. }
  434. entries += cnt;
  435. nentries -= cnt;
  436. PAL_PTR * addrs = __alloca(sizeof(PAL_PTR) * nentries);
  437. PAL_NUM * sizes = __alloca(sizeof(PAL_NUM) * nentries);
  438. PAL_FLG * prots = __alloca(sizeof(PAL_FLG) * nentries);
  439. for (int i = 0 ; i < nentries ; i++) {
  440. addrs[i] = entries[i]->mem.paddr ? NULL : (PAL_PTR) entries[i]->mem.addr;
  441. sizes[i] = entries[i]->mem.size;
  442. prots[i] = entries[i]->mem.prot;
  443. }
  444. if (!DkPhysicalMemoryMap(gipc, nentries, addrs, sizes, prots))
  445. return -PAL_ERRNO;
  446. for (int i = 0 ; i < nentries ; i++)
  447. if (entries[i]->mem.paddr)
  448. *(void **) entries[i]->mem.paddr = (void *) addrs[i];
  449. return 0;
  450. }
  451. int restore_checkpoint (struct cp_header * cphdr, struct mem_header * memhdr,
  452. ptr_t base, int type)
  453. {
  454. ptr_t cpoffset = cphdr->offset;
  455. ptr_t * offset = &cpoffset;
  456. long rebase = base - (ptr_t) cphdr->addr;
  457. int ret = 0;
  458. if (type)
  459. debug("restore checkpoint at 0x%08lx rebased from %p (%s only)\n",
  460. base, cphdr->addr, CP_FUNC_NAME(type));
  461. else
  462. debug("restore checkpoint at 0x%08lx rebased from %p\n",
  463. base, cphdr->addr);
  464. if (memhdr && memhdr->nentries) {
  465. struct shim_mem_entry * entry =
  466. (void *) (base + memhdr->entoffset);
  467. for (; entry ; entry = entry->prev) {
  468. CP_REBASE(entry->prev);
  469. CP_REBASE(entry->paddr);
  470. if (entry->paddr) {
  471. *entry->paddr = entry->data;
  472. } else {
  473. debug("memory entry [%p]: %p-%p\n", entry, entry->addr,
  474. entry->addr + entry->size);
  475. PAL_PTR addr = ALIGN_DOWN(entry->addr);
  476. PAL_NUM size = ALIGN_UP(entry->addr + entry->size) -
  477. (void *) addr;
  478. PAL_FLG prot = entry->prot;
  479. if (!DkVirtualMemoryAlloc(addr, size, 0, prot|PAL_PROT_WRITE)) {
  480. debug("failed allocating %p-%p\n", addr, addr + size);
  481. return -PAL_ERRNO;
  482. }
  483. CP_REBASE(entry->data);
  484. memcpy(entry->addr, entry->data, entry->size);
  485. if (!(entry->prot & PAL_PROT_WRITE) &&
  486. !DkVirtualMemoryProtect(addr, size, prot)) {
  487. debug("failed protecting %p-%p (ignored)\n", addr, addr + size);
  488. }
  489. }
  490. }
  491. }
  492. struct shim_cp_entry * cpent = NEXT_CP_ENTRY();
  493. while (cpent) {
  494. if (cpent->cp_type < CP_FUNC_BASE)
  495. goto next;
  496. if (type && cpent->cp_type != type)
  497. goto next;
  498. rs_func rs = (&__rs_func) [cpent->cp_type - CP_FUNC_BASE];
  499. ret = (*rs) (cpent, base, offset, rebase);
  500. if (ret < 0) {
  501. sys_printf("restore_checkpoint() at %s (%d)\n",
  502. CP_FUNC_NAME(cpent->cp_type), ret);
  503. return ret;
  504. }
  505. next:
  506. cpent = NEXT_CP_ENTRY();
  507. }
  508. debug("successfully restore checkpoint loaded at 0x%08lx - 0x%08lx\n",
  509. base, base + cphdr->size);
  510. return 0;
  511. }
  512. int init_from_checkpoint_file (const char * filename,
  513. struct newproc_cp_header * hdr,
  514. void ** cpptr)
  515. {
  516. struct shim_dentry * dir = NULL;
  517. int ret;
  518. /* XXX: Not sure what to do here yet */
  519. assert(0);
  520. ret = path_lookupat(NULL, filename, LOOKUP_ACCESS|LOOKUP_DIRECTORY, &dir, NULL);
  521. if (ret < 0)
  522. return ret;
  523. struct shim_mount * fs = dir->fs;
  524. struct shim_dirent * dirent;
  525. if (!fs->d_ops || !fs->d_ops->readdir) {
  526. ret = -EACCES;
  527. goto out;
  528. }
  529. if ((ret = fs->d_ops->readdir(dir, &dirent)) < 0)
  530. goto out;
  531. struct shim_dentry * first = NULL;
  532. struct shim_dirent * d = dirent;
  533. for ( ; d ; d = d->next) {
  534. struct shim_dentry * file;
  535. if ((ret = lookup_dentry(dir, d->name, strlen(d->name), false,
  536. &file, dir->fs)) < 0)
  537. continue;
  538. if (file->state & DENTRY_NEGATIVE)
  539. continue;
  540. if (!first) {
  541. first = file;
  542. continue;
  543. }
  544. const char * argv[3];
  545. argv[0] = "-resume-file";
  546. argv[1] = dentry_get_path(file, true, NULL);
  547. argv[2] = 0;
  548. PAL_HANDLE proc = DkProcessCreate(NULL, argv);
  549. if (!proc) {
  550. ret = -PAL_ERRNO;
  551. goto out;
  552. }
  553. put_dentry(file);
  554. }
  555. if (first) {
  556. ret = restore_from_file(dentry_get_path(first, true, NULL), hdr, cpptr);
  557. put_dentry(first);
  558. }
  559. free(dirent);
  560. out:
  561. put_dentry(dir);
  562. return ret;
  563. }
  564. int restore_from_file (const char * filename, struct newproc_cp_header * hdr,
  565. void ** cpptr)
  566. {
  567. struct shim_handle * file = get_new_handle();
  568. if (!file)
  569. return -ENOMEM;
  570. int ret = open_namei(file, NULL, filename, O_RDWR, 0, NULL);
  571. if (ret < 0) {
  572. put_handle(file);
  573. return ret;
  574. }
  575. struct shim_mount * fs = file->fs;
  576. open_handle(file);
  577. debug("restore %s\n", filename);
  578. struct cp_header cphdr;
  579. ret = fs->fs_ops->read(file, &cphdr, sizeof(struct cp_header));
  580. if (ret < 0)
  581. goto out;
  582. void * cpaddr = cphdr.addr;
  583. ret = fs->fs_ops->mmap(file, &cpaddr, ALIGN_UP(cphdr.size),
  584. PROT_READ|PROT_WRITE,
  585. MAP_PRIVATE|MAP_FILE, 0);
  586. if (ret < 0)
  587. goto out;
  588. hdr->hdr = cphdr;
  589. *cpptr = cpaddr;
  590. migrated_memory_start = cpaddr;
  591. migrated_memory_end = cpaddr + hdr->hdr.size;
  592. out:
  593. close_handle(file);
  594. return ret;
  595. }
  596. int send_handles_on_stream (PAL_HANDLE stream, struct shim_cp_store * store)
  597. {
  598. int nentries = store->palhdl_nentries;
  599. if (!nentries)
  600. return 0;
  601. struct shim_palhdl_entry ** entries =
  602. __alloca(sizeof(struct shim_palhdl_entry *) * nentries);
  603. struct shim_palhdl_entry * entry = store->last_palhdl_entry;
  604. int cnt = nentries;
  605. for ( ; entry ; entry = entry->prev)
  606. if (entry->handle) {
  607. if (!cnt)
  608. return -EINVAL;
  609. entries[--cnt] = entry;
  610. }
  611. entries += cnt;
  612. nentries -= cnt;
  613. for (int i = 0 ; i < nentries ; i++)
  614. if (!DkSendHandle(stream, entries[i]->handle))
  615. entries[i]->handle = NULL;
  616. return 0;
  617. }
  618. int receive_handles_on_stream (struct palhdl_header * hdr, ptr_t base,
  619. long rebase)
  620. {
  621. struct shim_palhdl_entry * palhdl_entries =
  622. (void *) (base + hdr->entoffset);
  623. int nentries = hdr->nentries;
  624. if (!nentries)
  625. return 0;
  626. debug("receive handles: %d entries\n", nentries);
  627. struct shim_palhdl_entry ** entries =
  628. __alloca(sizeof(struct shim_palhdl_entry *) * nentries);
  629. struct shim_palhdl_entry * entry = palhdl_entries;
  630. int cnt = nentries;
  631. for ( ; entry ; entry = entry->prev) {
  632. CP_REBASE(entry->prev);
  633. CP_REBASE(entry->phandle);
  634. if (!cnt)
  635. return -EINVAL;
  636. entries[--cnt] = entry;
  637. }
  638. entries += cnt;
  639. nentries -= cnt;
  640. for (int i = 0 ; i < nentries ; i++) {
  641. entry = entries[i];
  642. if (entry->handle) {
  643. PAL_HANDLE hdl = DkReceiveHandle(PAL_CB(parent_process));
  644. if (hdl) {
  645. *entry->phandle = hdl;
  646. continue;
  647. }
  648. }
  649. }
  650. return 0;
  651. }
  652. static void * cp_alloc (struct shim_cp_store * store, void * addr, size_t size)
  653. {
  654. if (addr) {
  655. /*
  656. * If the checkpoint needs more space, try to extend the checkpoint
  657. * store at the current address.
  658. */
  659. debug("try extend checkpoint store: %p-%p (size = %ld)\n",
  660. addr, addr + size, size);
  661. if (bkeep_mmap(addr, size, PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  662. NULL, 0, "cpstore") < 0)
  663. return NULL;
  664. } else {
  665. /*
  666. * Here we use a strategy to reduce internal fragmentation of virtual
  667. * memory space. Because we need a relatively large, continuous space
  668. * for dumping the checkpoint data, internal fragmentation can cause
  669. * the process to drain the virtual address space after forking a few
  670. * times. The previous space used for checkpoint may be fragmented
  671. * at the next fork.
  672. *
  673. * A simple trick we use here is to reserve some space right after the
  674. * checkpoint space. The reserved space is half of the size of the
  675. * checkpoint space, but can be further fine-tuned.
  676. */
  677. size_t reserve_size = ALIGN_UP(size >> 1);
  678. debug("try allocate checkpoint store (size = %ld, reserve = %ld)\n",
  679. size, reserve_size);
  680. /*
  681. * Allocating the checkpoint space at the first space found from the
  682. * top of the virtual address space.
  683. */
  684. addr = bkeep_unmapped_any(size + reserve_size, PROT_READ|PROT_WRITE,
  685. CP_VMA_FLAGS, NULL, 0, "cpstore");
  686. if (!addr)
  687. return NULL;
  688. bkeep_munmap(addr + size, reserve_size, CP_VMA_FLAGS);
  689. }
  690. addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
  691. PAL_PROT_READ|PAL_PROT_WRITE);
  692. if (!addr)
  693. bkeep_munmap(addr, size, CP_VMA_FLAGS);
  694. return addr;
  695. }
  696. DEFINE_PROFILE_CATAGORY(migrate_proc, migrate);
  697. DEFINE_PROFILE_INTERVAL(migrate_create_process, migrate_proc);
  698. DEFINE_PROFILE_INTERVAL(migrate_create_gipc, migrate_proc);
  699. DEFINE_PROFILE_INTERVAL(migrate_connect_ipc, migrate_proc);
  700. DEFINE_PROFILE_INTERVAL(migrate_init_checkpoint, migrate_proc);
  701. DEFINE_PROFILE_INTERVAL(migrate_save_checkpoint, migrate_proc);
  702. DEFINE_PROFILE_INTERVAL(migrate_send_header, migrate_proc);
  703. DEFINE_PROFILE_INTERVAL(migrate_send_checkpoint, migrate_proc);
  704. DEFINE_PROFILE_OCCURENCE(migrate_send_on_stream, migrate_proc);
  705. DEFINE_PROFILE_OCCURENCE(migrate_send_gipc_pages, migrate_proc);
  706. DEFINE_PROFILE_INTERVAL(migrate_send_pal_handles, migrate_proc);
  707. DEFINE_PROFILE_INTERVAL(migrate_free_checkpoint, migrate_proc);
  708. DEFINE_PROFILE_INTERVAL(migrate_wait_response, migrate_proc);
  709. static bool warn_no_gipc __attribute_migratable = true;
  710. /*
  711. * Create a new process and migrate the process states to the new process.
  712. *
  713. * @migrate: migration function defined by the caller
  714. * @exec: the executable to load in the new process
  715. * @argv: arguments passed to the new process
  716. * @thread: thread handle to be migrated to the new process
  717. *
  718. * The remaining arguments are passed into the migration function.
  719. */
  720. int do_migrate_process (int (*migrate) (struct shim_cp_store *,
  721. struct shim_thread *,
  722. struct shim_process *, va_list),
  723. struct shim_handle * exec,
  724. const char ** argv,
  725. struct shim_thread * thread, ...)
  726. {
  727. int ret = 0;
  728. struct shim_process * new_process = NULL;
  729. struct newproc_header hdr;
  730. int bytes;
  731. memset(&hdr, 0, sizeof(hdr));
  732. #ifdef PROFILE
  733. unsigned long begin_create_time = GET_PROFILE_INTERVAL();
  734. unsigned long create_time = begin_create_time;
  735. #endif
  736. BEGIN_PROFILE_INTERVAL();
  737. /*
  738. * Create the process first. The new process requires some time
  739. * to initialize before starting to receive checkpoint data.
  740. * Parallizing the process creation and checkpointing can improve
  741. * the latency of forking.
  742. */
  743. PAL_HANDLE proc = DkProcessCreate(exec ? qstrgetstr(&exec->uri) :
  744. pal_control.executable, argv);
  745. if (!proc) {
  746. ret = -PAL_ERRNO;
  747. goto err;
  748. }
  749. SAVE_PROFILE_INTERVAL(migrate_create_process);
  750. /*
  751. * Detect if GIPC is supported by the host. If GIPC is not supported
  752. * forking may be slow because we have to use RPC streams for migrating
  753. * user memory.
  754. */
  755. bool use_gipc = false;
  756. PAL_NUM gipc_key;
  757. PAL_HANDLE gipc_hdl = DkCreatePhysicalMemoryChannel(&gipc_key);
  758. if (gipc_hdl) {
  759. debug("created gipc store: gipc:%lu\n", gipc_key);
  760. use_gipc = true;
  761. SAVE_PROFILE_INTERVAL(migrate_create_gipc);
  762. } else {
  763. if (warn_no_gipc) {
  764. warn_no_gipc = false;
  765. sys_printf("WARNING: no physical memory support, process creation "
  766. "may be slow.\n");
  767. }
  768. }
  769. /* Create process and IPC bookkeepings */
  770. if (!(new_process = create_new_process(true))) {
  771. ret = -ENOMEM;
  772. goto err;
  773. }
  774. if (!(new_process->self = create_ipc_port(0, false))) {
  775. ret = -EACCES;
  776. goto err;
  777. }
  778. SAVE_PROFILE_INTERVAL(migrate_connect_ipc);
  779. /* Allocate a space for dumping the checkpoint data. */
  780. struct shim_cp_store cpstore;
  781. memset(&cpstore, 0, sizeof(cpstore));
  782. cpstore.alloc = cp_alloc;
  783. cpstore.use_gipc = use_gipc;
  784. cpstore.bound = CP_INIT_VMA_SIZE;
  785. while (1) {
  786. /*
  787. * Try allocating a space of a certain size. If the allocation fails,
  788. * continue to try with smaller sizes.
  789. */
  790. cpstore.base = (ptr_t) cp_alloc(&cpstore, 0, cpstore.bound);
  791. if (cpstore.base)
  792. break;
  793. cpstore.bound >>= 1;
  794. if (cpstore.bound < allocsize)
  795. break;
  796. }
  797. if (!cpstore.base) {
  798. ret = -ENOMEM;
  799. debug("failed creating checkpoint store\n");
  800. goto err;
  801. }
  802. SAVE_PROFILE_INTERVAL(migrate_init_checkpoint);
  803. /* Calling the migration function defined by the caller. */
  804. va_list ap;
  805. va_start(ap, thread);
  806. ret = (*migrate) (&cpstore, thread, new_process, ap);
  807. va_end(ap);
  808. if (ret < 0) {
  809. debug("failed creating checkpoint (ret = %d)\n", ret);
  810. goto err;
  811. }
  812. SAVE_PROFILE_INTERVAL(migrate_save_checkpoint);
  813. unsigned long checkpoint_time = GET_PROFILE_INTERVAL();
  814. unsigned long checkpoint_size = cpstore.offset + cpstore.mem_size;
  815. /* Checkpoint data created. */
  816. debug("checkpoint of %lu bytes created, %lu microsecond is spent.\n",
  817. checkpoint_size, checkpoint_time);
  818. hdr.checkpoint.hdr.addr = (void *) cpstore.base;
  819. hdr.checkpoint.hdr.size = checkpoint_size;
  820. if (cpstore.mem_nentries) {
  821. hdr.checkpoint.mem.entoffset =
  822. (ptr_t) cpstore.last_mem_entry - cpstore.base;
  823. hdr.checkpoint.mem.nentries = cpstore.mem_nentries;
  824. }
  825. if (cpstore.use_gipc) {
  826. snprintf(hdr.checkpoint.gipc.uri, sizeof(hdr.checkpoint.gipc.uri),
  827. "gipc:%lld", gipc_key);
  828. if (cpstore.gipc_nentries) {
  829. hdr.checkpoint.gipc.entoffset =
  830. (ptr_t) cpstore.last_gipc_entry - cpstore.base;
  831. hdr.checkpoint.gipc.nentries = cpstore.gipc_nentries;
  832. }
  833. }
  834. if (cpstore.palhdl_nentries) {
  835. hdr.checkpoint.palhdl.entoffset =
  836. (ptr_t) cpstore.last_palhdl_entry - cpstore.base;
  837. hdr.checkpoint.palhdl.nentries = cpstore.palhdl_nentries;
  838. }
  839. #ifdef PROFILE
  840. hdr.begin_create_time = begin_create_time;
  841. hdr.create_time = create_time;
  842. hdr.write_proc_time = GET_PROFILE_INTERVAL();
  843. #endif
  844. /*
  845. * Sending a header to the new process through the RPC stream to
  846. * notify the process to start receiving the checkpoint.
  847. */
  848. bytes = DkStreamWrite(proc, 0, sizeof(struct newproc_header), &hdr, NULL);
  849. if (!bytes) {
  850. ret = -PAL_ERRNO;
  851. debug("failed writing to process stream (ret = %d)\n", ret);
  852. goto err;
  853. } else if (bytes < sizeof(struct newproc_header)) {
  854. ret = -EACCES;
  855. goto err;
  856. }
  857. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, bytes);
  858. SAVE_PROFILE_INTERVAL(migrate_send_header);
  859. /* Sending the checkpoint either through GIPC or the RPC stream */
  860. ret = cpstore.use_gipc ? send_checkpoint_by_gipc(gipc_hdl, &cpstore) :
  861. send_checkpoint_on_stream(proc, &cpstore);
  862. if (ret < 0) {
  863. debug("failed sending checkpoint (ret = %d)\n", ret);
  864. goto err;
  865. }
  866. SAVE_PROFILE_INTERVAL(migrate_send_checkpoint);
  867. /*
  868. * For socket and RPC streams, we need to migrate the PAL handles
  869. * to the new process using PAL calls.
  870. */
  871. if ((ret = send_handles_on_stream(proc, &cpstore)) < 0)
  872. goto err;
  873. SAVE_PROFILE_INTERVAL(migrate_send_pal_handles);
  874. /* Free the checkpoint space */
  875. if ((ret = bkeep_munmap((void *) cpstore.base, cpstore.bound,
  876. CP_VMA_FLAGS)) < 0) {
  877. debug("failed unmaping checkpoint (ret = %d)\n", ret);
  878. goto err;
  879. }
  880. DkVirtualMemoryFree((PAL_PTR) cpstore.base, cpstore.bound);
  881. SAVE_PROFILE_INTERVAL(migrate_free_checkpoint);
  882. /* Wait for the response from the new process */
  883. struct newproc_response res;
  884. bytes = DkStreamRead(proc, 0, sizeof(struct newproc_response), &res,
  885. NULL, 0);
  886. if (bytes == 0) {
  887. ret = -PAL_ERRNO;
  888. goto err;
  889. }
  890. SAVE_PROFILE_INTERVAL(migrate_wait_response);
  891. if (gipc_hdl)
  892. DkObjectClose(gipc_hdl);
  893. /* Notify the namespace manager regarding the subleasing of TID */
  894. ipc_pid_sublease_send(res.child_vmid, thread->tid,
  895. qstrgetstr(&new_process->self->uri),
  896. NULL);
  897. /* Listen on the RPC stream to the new process */
  898. add_ipc_port_by_id(res.child_vmid, proc,
  899. IPC_PORT_DIRCLD|IPC_PORT_LISTEN|IPC_PORT_KEEPALIVE,
  900. &ipc_child_exit,
  901. NULL);
  902. destroy_process(new_process);
  903. return 0;
  904. err:
  905. if (gipc_hdl)
  906. DkObjectClose(gipc_hdl);
  907. if (proc)
  908. DkObjectClose(proc);
  909. if (new_process)
  910. destroy_process(new_process);
  911. sys_printf("process creation failed\n");
  912. return ret;
  913. }
  914. /*
  915. * Loading the checkpoint from the parent process or a checkpoint file
  916. *
  917. * @hdr: checkpoint header
  918. * @cpptr: returning the pointer of the loaded checkpoint
  919. */
  920. int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
  921. {
  922. void * base = NULL;
  923. size_t size = hdr->hdr.size;
  924. PAL_PTR mapaddr;
  925. PAL_NUM mapsize;
  926. long rebase;
  927. bool use_gipc = !!hdr->gipc.uri[0];
  928. PAL_HANDLE gipc_store;
  929. int ret = 0;
  930. BEGIN_PROFILE_INTERVAL();
  931. /*
  932. * Allocate a large enough space to load the checkpoint data.
  933. *
  934. * If CPSTORE_DERANDOMIZATION is enabled, try to allocate the space
  935. * at the exact address where the checkpoint is created. Otherwise,
  936. * just allocate at the first space we found from the top of the virtual
  937. * memory space.
  938. */
  939. #if CPSTORE_DERANDOMIZATION == 1
  940. if (hdr->hdr.addr
  941. && lookup_overlap_vma(hdr->hdr.addr, size, NULL) == -ENOENT) {
  942. /* Try to load the checkpoint at the same address */
  943. base = hdr->hdr.addr;
  944. mapaddr = (PAL_PTR) ALIGN_DOWN(base);
  945. mapsize = (PAL_PTR) ALIGN_UP(base + size) - mapaddr;
  946. /* Need to create VMA before allocation */
  947. ret = bkeep_mmap((void *) mapaddr, mapsize,
  948. PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  949. NULL, 0, "cpstore");
  950. if (ret < 0)
  951. base = NULL;
  952. }
  953. #endif
  954. if (!base) {
  955. base = bkeep_unmapped_any(ALIGN_UP(size),
  956. PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  957. NULL, 0, "cpstore");
  958. if (!base)
  959. return -ENOMEM;
  960. mapaddr = (PAL_PTR) base;
  961. mapsize = (PAL_NUM) ALIGN_UP(size);
  962. }
  963. debug("checkpoint mapped at %p-%p\n", base, base + size);
  964. PAL_FLG pal_prot = PAL_PROT_READ|PAL_PROT_WRITE;
  965. PAL_PTR mapped = mapaddr;
  966. if (use_gipc) {
  967. debug("open gipc store: %s\n", hdr->gipc.uri);
  968. gipc_store = DkStreamOpen(hdr->gipc.uri, 0, 0, 0, 0);
  969. if (!gipc_store ||
  970. !DkPhysicalMemoryMap(gipc_store, 1, &mapped, &mapsize, &pal_prot))
  971. return -PAL_ERRNO;
  972. SAVE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc);
  973. } else {
  974. void * mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0, pal_prot);
  975. if (!mapped)
  976. return -PAL_ERRNO;
  977. }
  978. assert(mapaddr == mapped);
  979. /*
  980. * If the checkpoint is loaded at a different address from where it is
  981. * created, we need to rebase the pointers in the checkpoint.
  982. */
  983. rebase = (long) ((uintptr_t) base - (uintptr_t) hdr->hdr.addr);
  984. /* Load the memory data sent separately over GIPC or the RPC stream. */
  985. if (use_gipc) {
  986. if ((ret = restore_gipc(gipc_store, &hdr->gipc, (ptr_t) base, rebase)) < 0)
  987. return ret;
  988. SAVE_PROFILE_INTERVAL(child_load_memory_by_gipc);
  989. DkStreamDelete(gipc_store, 0);
  990. } else {
  991. int total_bytes = 0;
  992. while (total_bytes < size) {
  993. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  994. size - total_bytes,
  995. (void *) base + total_bytes, NULL, 0);
  996. if (!bytes) {
  997. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  998. PAL_ERRNO == EWOULDBLOCK)
  999. continue;
  1000. return -PAL_ERRNO;
  1001. }
  1002. total_bytes += bytes;
  1003. }
  1004. SAVE_PROFILE_INTERVAL(child_load_checkpoint_on_pipe);
  1005. debug("%d bytes read on stream\n", total_bytes);
  1006. }
  1007. /* Receive socket or RPC handles from the parent process. */
  1008. ret = receive_handles_on_stream(&hdr->palhdl, (ptr_t) base, rebase);
  1009. if (ret < 0) {
  1010. /* TODO: unload the checkpoint space */
  1011. return ret;
  1012. }
  1013. SAVE_PROFILE_INTERVAL(child_receive_handles);
  1014. migrated_memory_start = (void *) mapaddr;
  1015. migrated_memory_end = (void *) mapaddr + mapsize;
  1016. *cpptr = (void *) base;
  1017. return 0;
  1018. }
  1019. void restore_context (struct shim_context * context)
  1020. {
  1021. int nregs = sizeof(struct shim_regs) / sizeof(void *);
  1022. void * regs[nregs + 1];
  1023. if (context->regs)
  1024. memcpy(regs, context->regs, sizeof(struct shim_regs));
  1025. else
  1026. memset(regs, 0, sizeof(struct shim_regs));
  1027. debug("restore context: SP = %p, IP = %p\n", context->sp, context->ret_ip);
  1028. regs[nregs] = (void *) context->sp;
  1029. /* don't clobber redzone. If sigaltstack is used,
  1030. * this area won't be clobbered by signal context */
  1031. *(void **) (context->sp - 128 - 8) = context->ret_ip;
  1032. /* Ready to resume execution, re-enable preemption. */
  1033. shim_tcb_t * tcb = shim_get_tls();
  1034. __enable_preempt(tcb);
  1035. memset(context, 0, sizeof(struct shim_context));
  1036. __asm__ volatile("movq %0, %%rsp\r\n"
  1037. "popq %%r15\r\n"
  1038. "popq %%r14\r\n"
  1039. "popq %%r13\r\n"
  1040. "popq %%r12\r\n"
  1041. "popq %%r11\r\n"
  1042. "popq %%r10\r\n"
  1043. "popq %%r9\r\n"
  1044. "popq %%r8\r\n"
  1045. "popq %%rcx\r\n"
  1046. "popq %%rdx\r\n"
  1047. "popq %%rsi\r\n"
  1048. "popq %%rdi\r\n"
  1049. "popq %%rbx\r\n"
  1050. "popq %%rbp\r\n"
  1051. "popfq\r\n"
  1052. "popq %%rsp\r\n"
  1053. "movq $0, %%rax\r\n"
  1054. "jmp *-128-8(%%rsp)\r\n"
  1055. :: "g"(&regs) : "memory");
  1056. }