shim_checkpoint.c 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_checkpoint.c
  15. *
  16. * This file contains codes for checkpoint / migration scheme of library OS.
  17. */
  18. #include "asm-offsets.h"
  19. #include <shim_internal.h>
  20. #include <shim_utils.h>
  21. #include <shim_thread.h>
  22. #include <shim_handle.h>
  23. #include <shim_vma.h>
  24. #include <shim_fs.h>
  25. #include <shim_checkpoint.h>
  26. #include <shim_ipc.h>
  27. #include <shim_profile.h>
  28. #include <pal.h>
  29. #include <pal_error.h>
  30. #include <list.h>
  31. #include <stdarg.h>
  32. #include <asm/fcntl.h>
  33. #include <asm/mman.h>
  34. DEFINE_PROFILE_CATEGORY(migrate, );
  35. DEFINE_PROFILE_CATEGORY(checkpoint, migrate);
  36. DEFINE_PROFILE_INTERVAL(checkpoint_create_map, checkpoint);
  37. DEFINE_PROFILE_INTERVAL(checkpoint_copy, checkpoint);
  38. DEFINE_PROFILE_CATEGORY(checkpoint_func, checkpoint);
  39. DEFINE_PROFILE_INTERVAL(checkpoint_destroy_map, checkpoint);
  40. DEFINE_PROFILE_OCCURENCE(checkpoint_count, checkpoint);
  41. DEFINE_PROFILE_OCCURENCE(checkpoint_total_size, checkpoint);
  42. DEFINE_PROFILE_CATEGORY(resume, migrate);
  43. DEFINE_PROFILE_INTERVAL(child_created_in_new_process, resume);
  44. DEFINE_PROFILE_INTERVAL(child_wait_header, resume);
  45. DEFINE_PROFILE_INTERVAL(child_receive_header, resume);
  46. DEFINE_PROFILE_INTERVAL(do_migration, resume);
  47. DEFINE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc, resume);
  48. DEFINE_PROFILE_INTERVAL(child_load_memory_by_gipc, resume);
  49. DEFINE_PROFILE_INTERVAL(child_load_checkpoint_on_pipe, resume);
  50. DEFINE_PROFILE_INTERVAL(child_receive_handles, resume);
  51. DEFINE_PROFILE_INTERVAL(restore_checkpoint, resume);
  52. DEFINE_PROFILE_CATEGORY(resume_func, resume);
  53. DEFINE_PROFILE_INTERVAL(child_total_migration_time, resume);
  54. #define CP_HASH_SIZE 256
  55. #define CP_HASH(addr) ((hashfunc((ptr_t)(addr))) & (CP_HASH_SIZE - 1))
  56. typedef uint16_t FASTHASHTYPE;
  57. #define CP_MAP_ENTRY_NUM 64
  58. DEFINE_LIST(cp_map_entry);
  59. struct cp_map_entry
  60. {
  61. LIST_TYPE(cp_map_entry) hlist;
  62. struct shim_cp_map_entry entry;
  63. };
  64. DEFINE_LISTP(cp_map_entry);
  65. struct cp_map {
  66. struct cp_map_buffer {
  67. struct cp_map_buffer * next;
  68. int num, cnt;
  69. struct cp_map_entry entries[0];
  70. } * buffers;
  71. struct hash_map {
  72. LISTP_TYPE(cp_map_entry) head[CP_HASH_SIZE];
  73. } map;
  74. };
  75. void * create_cp_map (void)
  76. {
  77. void * data = malloc(sizeof(struct cp_map) + sizeof(struct cp_map_buffer) +
  78. sizeof(struct cp_map_entry) * CP_MAP_ENTRY_NUM);
  79. if (!data)
  80. return NULL;
  81. struct cp_map * map = (struct cp_map *) data;
  82. struct cp_map_buffer * buffer =
  83. (struct cp_map_buffer *) (data + sizeof(struct cp_map));
  84. memset(map, 0, sizeof(*map));
  85. map->buffers = buffer;
  86. buffer->next = NULL;
  87. buffer->num = CP_MAP_ENTRY_NUM;
  88. buffer->cnt = 0;
  89. return (void *) map;
  90. }
  91. void destroy_cp_map (void * map)
  92. {
  93. struct cp_map * m = (struct cp_map *) map;
  94. struct cp_map_buffer * buffer = m->buffers, * next;
  95. for (next = buffer ? buffer->next : NULL ;
  96. buffer && next ;
  97. buffer = next, next = next ? next->next : NULL)
  98. free(buffer);
  99. free(m);
  100. }
  101. static inline
  102. struct cp_map_buffer * extend_cp_map (struct cp_map * map)
  103. {
  104. struct cp_map_buffer * buffer =
  105. malloc(sizeof(struct cp_map_buffer) +
  106. sizeof(struct cp_map_entry) * CP_MAP_ENTRY_NUM);
  107. if (!buffer)
  108. return NULL;
  109. buffer->next = map->buffers;
  110. map->buffers = buffer;
  111. buffer->num = CP_MAP_ENTRY_NUM;
  112. buffer->cnt = 0;
  113. return buffer;
  114. }
  115. struct shim_cp_map_entry *
  116. get_cp_map_entry (void * map, void * addr, bool create)
  117. {
  118. struct cp_map * m = (struct cp_map *) map;
  119. FASTHASHTYPE hash = CP_HASH(addr);
  120. LISTP_TYPE(cp_map_entry) * head = &m->map.head[hash];
  121. struct cp_map_entry * tmp;
  122. struct shim_cp_map_entry * e = NULL;
  123. LISTP_FOR_EACH_ENTRY(tmp, head, hlist)
  124. if (tmp->entry.addr == addr)
  125. e = &tmp->entry;
  126. if (create && !e) {
  127. struct cp_map_buffer * buffer = m->buffers;
  128. if (buffer->cnt == buffer->num)
  129. buffer = extend_cp_map(m);
  130. struct cp_map_entry *new = &buffer->entries[buffer->cnt++];
  131. INIT_LIST_HEAD(new, hlist);
  132. LISTP_ADD(new, head, hlist);
  133. new->entry.addr = addr;
  134. new->entry.off = 0;
  135. e = &new->entry;
  136. }
  137. return e;
  138. }
  139. BEGIN_CP_FUNC(memory)
  140. {
  141. struct shim_mem_entry * entry =
  142. (void *) (base + ADD_CP_OFFSET(sizeof(struct shim_mem_entry)));
  143. entry->addr = obj;
  144. entry->size = size;
  145. entry->paddr = NULL;
  146. entry->prot = PAL_PROT_READ|PAL_PROT_WRITE;
  147. entry->data = NULL;
  148. entry->prev = store->last_mem_entry;
  149. store->last_mem_entry = entry;
  150. store->mem_nentries++;
  151. store->mem_size += size;
  152. if (objp)
  153. *objp = entry;
  154. }
  155. END_CP_FUNC_NO_RS(memory)
  156. BEGIN_CP_FUNC(palhdl)
  157. {
  158. ptr_t off = ADD_CP_OFFSET(sizeof(struct shim_palhdl_entry));
  159. struct shim_palhdl_entry * entry = (void *) (base + off);
  160. entry->handle = (PAL_HANDLE) obj;
  161. entry->uri = NULL;
  162. entry->phandle = NULL;
  163. entry->prev = store->last_palhdl_entry;
  164. store->last_palhdl_entry = entry;
  165. store->palhdl_nentries++;
  166. ADD_CP_FUNC_ENTRY(off);
  167. if (objp)
  168. *objp = entry;
  169. }
  170. END_CP_FUNC(palhdl)
  171. BEGIN_RS_FUNC(palhdl)
  172. {
  173. struct shim_palhdl_entry * ent = (void *) (base + GET_CP_FUNC_ENTRY());
  174. if (ent->phandle && !ent->phandle && ent->uri) {
  175. /* XXX: reopen the stream */
  176. }
  177. }
  178. END_RS_FUNC(palhdl)
  179. BEGIN_CP_FUNC(migratable)
  180. {
  181. struct shim_mem_entry * mem_entry;
  182. DO_CP_SIZE(memory, &__migratable, &__migratable_end - &__migratable,
  183. &mem_entry);
  184. struct shim_cp_entry * entry = ADD_CP_FUNC_ENTRY(0UL);
  185. mem_entry->paddr = (void **) &entry->cp_un.cp_val;
  186. }
  187. END_CP_FUNC(migratable)
  188. BEGIN_RS_FUNC(migratable)
  189. {
  190. void * data = (void *) GET_CP_FUNC_ENTRY();
  191. CP_REBASE(data);
  192. memcpy(&__migratable, data, &__migratable_end - &__migratable);
  193. }
  194. END_RS_FUNC(migratable)
  195. BEGIN_CP_FUNC(environ)
  196. {
  197. const char ** e, ** envp = (void *) obj;
  198. int nenvp = 0;
  199. int envp_bytes = 0;
  200. for (e = envp ; *e ; e++) {
  201. nenvp++;
  202. envp_bytes += strlen(*e) + 1;
  203. }
  204. ptr_t off = ADD_CP_OFFSET(sizeof(char *) * (nenvp + 1) + envp_bytes);
  205. const char ** new_envp = (void *) base + off;
  206. char * ptr = (void *) base + off + sizeof(char *) * (nenvp + 1);
  207. for (int i = 0 ; i < nenvp ; i++) {
  208. int len = strlen(envp[i]);
  209. new_envp[i] = ptr;
  210. memcpy(ptr, envp[i], len + 1);
  211. ptr += len + 1;
  212. }
  213. new_envp[nenvp] = NULL;
  214. ADD_CP_FUNC_ENTRY(off);
  215. }
  216. END_CP_FUNC(environ)
  217. BEGIN_RS_FUNC(environ)
  218. {
  219. const char ** envp = (void *) base + GET_CP_FUNC_ENTRY();
  220. const char ** e;
  221. for (e = envp ; *e ; e++) {
  222. CP_REBASE(*e);
  223. DEBUG_RS("%s", *e);
  224. }
  225. initial_envp = envp;
  226. }
  227. END_RS_FUNC(environ)
  228. BEGIN_CP_FUNC(qstr)
  229. {
  230. struct shim_qstr * qstr = (struct shim_qstr *) obj;
  231. if (qstr->len < QSTR_SIZE) {
  232. if (qstr->oflow) {
  233. memcpy(qstr->name, qstr->oflow, qstr->len + 1);
  234. qstr->oflow = NULL;
  235. }
  236. } else {
  237. struct shim_str * str =
  238. (void *) (base + ADD_CP_OFFSET(qstr->len + 1));
  239. memcpy(str, qstr->oflow, qstr->len + 1);
  240. qstr->oflow = str;
  241. ADD_CP_FUNC_ENTRY((ptr_t) qstr - base);
  242. }
  243. }
  244. END_CP_FUNC(qstr)
  245. BEGIN_RS_FUNC(qstr)
  246. {
  247. struct shim_qstr * qstr = (void *) (base + GET_CP_FUNC_ENTRY());
  248. CP_REBASE(qstr->oflow);
  249. }
  250. END_RS_FUNC(qstr)
  251. BEGIN_CP_FUNC(gipc)
  252. {
  253. ptr_t off = ADD_CP_OFFSET(sizeof(struct shim_gipc_entry));
  254. void * send_addr = (void *) ALIGN_DOWN(obj);
  255. size_t send_size = (void *) ALIGN_UP(obj + size) - send_addr;
  256. struct shim_gipc_entry * entry = (void *) (base + off);
  257. entry->mem.addr = send_addr;
  258. entry->mem.size = send_size;
  259. entry->mem.prot = PAL_PROT_READ|PAL_PROT_WRITE;
  260. entry->mem.prev = (void *) store->last_gipc_entry;
  261. store->last_gipc_entry = entry;
  262. store->gipc_nentries++;
  263. #if HASH_GIPC == 1
  264. struct md5_ctx ctx;
  265. md5_init(&ctx);
  266. md5_update(&ctx, send_addr, allocsize);
  267. md5_final(&ctx);
  268. entry->first_hash = *(unsigned long *) ctx.digest;
  269. #endif /* HASH_GIPC == 1 */
  270. ADD_CP_FUNC_ENTRY(off);
  271. if (objp)
  272. *objp = entry;
  273. }
  274. END_CP_FUNC(gipc)
  275. BEGIN_RS_FUNC(gipc)
  276. {
  277. #if HASH_GIPC == 1
  278. struct shim_gipc_entry * entry = (void *) (base + GET_CP_FUNC_ENTRY());
  279. PAL_FLG pal_prot = PAL_PROT(entry->prot, 0);
  280. if (!(pal_prot & PROT_READ))
  281. DkVirtualMemoryProtect(entry->addr, entry->npages * allocsize,
  282. pal_prot|PAL_PROT_READ);
  283. struct md5_ctx ctx;
  284. md5_init(&ctx);
  285. md5_update(&ctx, entry->addr, allocsize);
  286. md5_final(&ctx);
  287. assert(*(unsigned long *) ctx.digest == entry->first_hash);
  288. if (!(pal_prot & PAL_PROT_READ))
  289. DkVirtualMemoryProtect(entry->addr, entry->npages * allocsize,
  290. pal_prot);
  291. #endif /* HASH_GIPC == 1 */
  292. }
  293. END_RS_FUNC(gipc)
  294. static int send_checkpoint_by_gipc (PAL_HANDLE gipc_store,
  295. struct shim_cp_store * store)
  296. {
  297. PAL_PTR hdr_addr = (PAL_PTR) store->base;
  298. PAL_NUM hdr_size = (PAL_NUM) store->offset + store->mem_size;
  299. assert(ALIGNED(hdr_addr));
  300. int mem_nentries = store->mem_nentries;
  301. if (mem_nentries) {
  302. struct shim_mem_entry ** mem_entries =
  303. __alloca(sizeof(struct shim_mem_entry *) * mem_nentries);
  304. int mem_cnt = mem_nentries;
  305. struct shim_mem_entry * mem_ent = store->last_mem_entry;
  306. for (; mem_ent ; mem_ent = mem_ent->prev) {
  307. if (!mem_cnt)
  308. return -EINVAL;
  309. mem_entries[--mem_cnt] = mem_ent;
  310. }
  311. mem_entries += mem_cnt;
  312. mem_nentries -= mem_cnt;
  313. for (int i = 0 ; i < mem_nentries ; i++) {
  314. void * mem_addr = (void *) store->base +
  315. __ADD_CP_OFFSET(mem_entries[i]->size);
  316. assert(store->offset <= hdr_size);
  317. memcpy(mem_addr, mem_entries[i]->addr, mem_entries[i]->size);
  318. mem_entries[i]->data = mem_addr;
  319. }
  320. }
  321. hdr_size = ALIGN_UP(hdr_size);
  322. int npages = DkPhysicalMemoryCommit(gipc_store, 1, &hdr_addr, &hdr_size);
  323. if (!npages)
  324. return -EPERM;
  325. int nentries = store->gipc_nentries;
  326. PAL_PTR * gipc_addrs = __alloca(sizeof(PAL_PTR) * nentries);
  327. PAL_NUM * gipc_sizes = __alloca(sizeof(PAL_NUM) * nentries);
  328. int total_pages = 0;
  329. int cnt = nentries;
  330. struct shim_gipc_entry * ent = store->last_gipc_entry;
  331. for (; ent ; ent = (void *) ent->mem.prev) {
  332. if (!cnt)
  333. return -EINVAL;
  334. cnt--;
  335. gipc_addrs[cnt] = ent->mem.addr;
  336. gipc_sizes[cnt] = ent->mem.size;
  337. total_pages += ent->mem.size / allocsize;
  338. }
  339. gipc_addrs += cnt;
  340. gipc_sizes += cnt;
  341. nentries -= cnt;
  342. /* Chia-Che: sending an empty page can't ever be a smart idea.
  343. we might rather fail here */
  344. npages = DkPhysicalMemoryCommit(gipc_store, nentries, gipc_addrs,
  345. gipc_sizes);
  346. if (npages < total_pages) {
  347. debug("gipc supposed to send %d pages, but only %d pages sent\n",
  348. total_pages, npages);
  349. return -ENOMEM;
  350. }
  351. ADD_PROFILE_OCCURENCE(migrate_send_gipc_pages, npages);
  352. return 0;
  353. }
  354. static int send_checkpoint_on_stream (PAL_HANDLE stream,
  355. struct shim_cp_store * store)
  356. {
  357. int mem_nentries = store->mem_nentries;
  358. struct shim_mem_entry ** mem_entries;
  359. if (mem_nentries) {
  360. mem_entries = __alloca(sizeof(struct shim_mem_entry *) * mem_nentries);
  361. int mem_cnt = mem_nentries;
  362. struct shim_mem_entry * mem_ent = store->last_mem_entry;
  363. for (; mem_ent ; mem_ent = mem_ent->prev) {
  364. if (!mem_cnt)
  365. return -EINVAL;
  366. mem_entries[--mem_cnt] = mem_ent;
  367. }
  368. void * mem_addr = (void *) store->base + store->offset;
  369. mem_entries += mem_cnt;
  370. mem_nentries -= mem_cnt;
  371. for (int i = 0 ; i < mem_nentries ; i++) {
  372. int mem_size = mem_entries[i]->size;
  373. mem_entries[i]->data = mem_addr;
  374. mem_addr += mem_size;
  375. }
  376. }
  377. size_t total_bytes = store->offset;
  378. size_t bytes = 0;
  379. do {
  380. size_t ret = DkStreamWrite(stream, 0, total_bytes - bytes,
  381. (void *) store->base + bytes, NULL);
  382. if (!ret) {
  383. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  384. PAL_ERRNO == EWOULDBLOCK)
  385. continue;
  386. return -PAL_ERRNO;
  387. }
  388. bytes += ret;
  389. } while (bytes < total_bytes);
  390. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, total_bytes);
  391. for (int i = 0 ; i < mem_nentries ; i++) {
  392. size_t mem_size = mem_entries[i]->size;
  393. void * mem_addr = mem_entries[i]->addr;
  394. bytes = 0;
  395. do {
  396. size_t ret = DkStreamWrite(stream, 0, mem_size - bytes,
  397. mem_addr + bytes, NULL);
  398. if (!ret) {
  399. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  400. PAL_ERRNO == EWOULDBLOCK)
  401. continue;
  402. return -PAL_ERRNO;
  403. }
  404. bytes += ret;
  405. } while (bytes < mem_entries[i]->size);
  406. if (!(mem_entries[i]->prot & PAL_PROT_READ))
  407. DkVirtualMemoryProtect(mem_addr, mem_size, mem_entries[i]->prot);
  408. mem_entries[i]->size = mem_size;
  409. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, mem_size);
  410. }
  411. return 0;
  412. }
  413. static int restore_gipc (PAL_HANDLE gipc, struct gipc_header * hdr, ptr_t base,
  414. long rebase)
  415. {
  416. struct shim_gipc_entry * gipc_entries = (void *) (base + hdr->entoffset);
  417. int nentries = hdr->nentries;
  418. if (!nentries)
  419. return 0;
  420. debug("restore memory by gipc: %d entries\n", nentries);
  421. struct shim_gipc_entry ** entries =
  422. __alloca(sizeof(struct shim_gipc_entry *) * nentries);
  423. struct shim_gipc_entry * entry = gipc_entries;
  424. int cnt = nentries;
  425. while (entry) {
  426. CP_REBASE(entry->mem.prev);
  427. CP_REBASE(entry->mem.paddr);
  428. if (!cnt)
  429. return -EINVAL;
  430. entries[--cnt] = entry;
  431. entry = (void *) entry->mem.prev;
  432. }
  433. entries += cnt;
  434. nentries -= cnt;
  435. PAL_PTR * addrs = __alloca(sizeof(PAL_PTR) * nentries);
  436. PAL_NUM * sizes = __alloca(sizeof(PAL_NUM) * nentries);
  437. PAL_FLG * prots = __alloca(sizeof(PAL_FLG) * nentries);
  438. for (int i = 0 ; i < nentries ; i++) {
  439. addrs[i] = entries[i]->mem.paddr ? NULL : (PAL_PTR) entries[i]->mem.addr;
  440. sizes[i] = entries[i]->mem.size;
  441. prots[i] = entries[i]->mem.prot;
  442. }
  443. if (!DkPhysicalMemoryMap(gipc, nentries, addrs, sizes, prots))
  444. return -PAL_ERRNO;
  445. for (int i = 0 ; i < nentries ; i++)
  446. if (entries[i]->mem.paddr)
  447. *(void **) entries[i]->mem.paddr = (void *) addrs[i];
  448. return 0;
  449. }
  450. int restore_checkpoint (struct cp_header * cphdr, struct mem_header * memhdr,
  451. ptr_t base, ptr_t type)
  452. {
  453. ptr_t cpoffset = cphdr->offset;
  454. ptr_t * offset = &cpoffset;
  455. long rebase = base - (ptr_t) cphdr->addr;
  456. int ret = 0;
  457. if (type)
  458. debug("restore checkpoint at 0x%08lx rebased from %p (%s only)\n",
  459. base, cphdr->addr, CP_FUNC_NAME(type));
  460. else
  461. debug("restore checkpoint at 0x%08lx rebased from %p\n",
  462. base, cphdr->addr);
  463. if (memhdr && memhdr->nentries) {
  464. struct shim_mem_entry * entry =
  465. (void *) (base + memhdr->entoffset);
  466. for (; entry ; entry = entry->prev) {
  467. CP_REBASE(entry->prev);
  468. CP_REBASE(entry->paddr);
  469. if (entry->paddr) {
  470. *entry->paddr = entry->data;
  471. } else {
  472. debug("memory entry [%p]: %p-%p\n", entry, entry->addr,
  473. entry->addr + entry->size);
  474. PAL_PTR addr = ALIGN_DOWN(entry->addr);
  475. PAL_NUM size = ALIGN_UP(entry->addr + entry->size) -
  476. (void *) addr;
  477. PAL_FLG prot = entry->prot;
  478. if (!DkVirtualMemoryAlloc(addr, size, 0, prot|PAL_PROT_WRITE)) {
  479. debug("failed allocating %p-%p\n", addr, addr + size);
  480. return -PAL_ERRNO;
  481. }
  482. CP_REBASE(entry->data);
  483. memcpy(entry->addr, entry->data, entry->size);
  484. if (!(entry->prot & PAL_PROT_WRITE) &&
  485. !DkVirtualMemoryProtect(addr, size, prot)) {
  486. debug("failed protecting %p-%p (ignored)\n", addr, addr + size);
  487. }
  488. }
  489. }
  490. }
  491. struct shim_cp_entry * cpent = NEXT_CP_ENTRY();
  492. while (cpent) {
  493. if (cpent->cp_type < CP_FUNC_BASE)
  494. goto next;
  495. if (type && cpent->cp_type != type)
  496. goto next;
  497. rs_func rs = (&__rs_func) [cpent->cp_type - CP_FUNC_BASE];
  498. ret = (*rs) (cpent, base, offset, rebase);
  499. if (ret < 0) {
  500. SYS_PRINTF("restore_checkpoint() at %s (%d)\n",
  501. CP_FUNC_NAME(cpent->cp_type), ret);
  502. return ret;
  503. }
  504. next:
  505. cpent = NEXT_CP_ENTRY();
  506. }
  507. debug("successfully restore checkpoint loaded at 0x%08lx - 0x%08lx\n",
  508. base, base + cphdr->size);
  509. return 0;
  510. }
  511. int init_from_checkpoint_file (const char * filename,
  512. struct newproc_cp_header * hdr,
  513. void ** cpptr)
  514. {
  515. struct shim_dentry * dir = NULL;
  516. int ret;
  517. /* XXX: Not sure what to do here yet */
  518. assert(0);
  519. ret = path_lookupat(NULL, filename, LOOKUP_ACCESS|LOOKUP_DIRECTORY, &dir, NULL);
  520. if (ret < 0)
  521. return ret;
  522. struct shim_mount * fs = dir->fs;
  523. struct shim_dirent * dirent;
  524. if (!fs->d_ops || !fs->d_ops->readdir) {
  525. ret = -EACCES;
  526. goto out;
  527. }
  528. if ((ret = fs->d_ops->readdir(dir, &dirent)) < 0)
  529. goto out;
  530. struct shim_dentry * first = NULL;
  531. struct shim_dirent * d = dirent;
  532. for ( ; d ; d = d->next) {
  533. struct shim_dentry * file;
  534. if ((ret = lookup_dentry(dir, d->name, strlen(d->name), false,
  535. &file, dir->fs)) < 0)
  536. continue;
  537. if (file->state & DENTRY_NEGATIVE)
  538. continue;
  539. if (!first) {
  540. first = file;
  541. continue;
  542. }
  543. const char * argv[3];
  544. argv[0] = "-resume-file";
  545. argv[1] = dentry_get_path(file, true, NULL);
  546. argv[2] = 0;
  547. PAL_HANDLE proc = DkProcessCreate(NULL, argv);
  548. if (!proc) {
  549. ret = -PAL_ERRNO;
  550. goto out;
  551. }
  552. put_dentry(file);
  553. }
  554. if (first) {
  555. ret = restore_from_file(dentry_get_path(first, true, NULL), hdr, cpptr);
  556. put_dentry(first);
  557. }
  558. free(dirent);
  559. out:
  560. put_dentry(dir);
  561. return ret;
  562. }
  563. int restore_from_file (const char * filename, struct newproc_cp_header * hdr,
  564. void ** cpptr)
  565. {
  566. struct shim_handle * file = get_new_handle();
  567. if (!file)
  568. return -ENOMEM;
  569. int ret = open_namei(file, NULL, filename, O_RDWR, 0, NULL);
  570. if (ret < 0) {
  571. put_handle(file);
  572. return ret;
  573. }
  574. struct shim_mount * fs = file->fs;
  575. open_handle(file);
  576. debug("restore %s\n", filename);
  577. struct cp_header cphdr;
  578. ret = fs->fs_ops->read(file, &cphdr, sizeof(struct cp_header));
  579. if (ret < 0)
  580. goto out;
  581. void * cpaddr = cphdr.addr;
  582. ret = fs->fs_ops->mmap(file, &cpaddr, ALIGN_UP(cphdr.size),
  583. PROT_READ|PROT_WRITE,
  584. MAP_PRIVATE|MAP_FILE, 0);
  585. if (ret < 0)
  586. goto out;
  587. hdr->hdr = cphdr;
  588. *cpptr = cpaddr;
  589. migrated_memory_start = cpaddr;
  590. migrated_memory_end = cpaddr + hdr->hdr.size;
  591. out:
  592. close_handle(file);
  593. return ret;
  594. }
  595. int send_handles_on_stream (PAL_HANDLE stream, struct shim_cp_store * store)
  596. {
  597. int nentries = store->palhdl_nentries;
  598. if (!nentries)
  599. return 0;
  600. struct shim_palhdl_entry ** entries =
  601. __alloca(sizeof(struct shim_palhdl_entry *) * nentries);
  602. struct shim_palhdl_entry * entry = store->last_palhdl_entry;
  603. int cnt = nentries;
  604. for ( ; entry ; entry = entry->prev)
  605. if (entry->handle) {
  606. if (!cnt)
  607. return -EINVAL;
  608. entries[--cnt] = entry;
  609. }
  610. entries += cnt;
  611. nentries -= cnt;
  612. for (int i = 0 ; i < nentries ; i++)
  613. if (!DkSendHandle(stream, entries[i]->handle))
  614. entries[i]->handle = NULL;
  615. return 0;
  616. }
  617. int receive_handles_on_stream (struct palhdl_header * hdr, ptr_t base,
  618. long rebase)
  619. {
  620. struct shim_palhdl_entry * palhdl_entries =
  621. (void *) (base + hdr->entoffset);
  622. int nentries = hdr->nentries;
  623. if (!nentries)
  624. return 0;
  625. debug("receive handles: %d entries\n", nentries);
  626. struct shim_palhdl_entry ** entries =
  627. __alloca(sizeof(struct shim_palhdl_entry *) * nentries);
  628. struct shim_palhdl_entry * entry = palhdl_entries;
  629. int cnt = nentries;
  630. for ( ; entry ; entry = entry->prev) {
  631. CP_REBASE(entry->prev);
  632. CP_REBASE(entry->phandle);
  633. if (!cnt)
  634. return -EINVAL;
  635. entries[--cnt] = entry;
  636. }
  637. entries += cnt;
  638. nentries -= cnt;
  639. for (int i = 0 ; i < nentries ; i++) {
  640. entry = entries[i];
  641. if (entry->handle) {
  642. PAL_HANDLE hdl = DkReceiveHandle(PAL_CB(parent_process));
  643. if (hdl) {
  644. *entry->phandle = hdl;
  645. continue;
  646. }
  647. }
  648. }
  649. return 0;
  650. }
  651. static void * cp_alloc (struct shim_cp_store * store, void * addr, size_t size)
  652. {
  653. if (addr) {
  654. /*
  655. * If the checkpoint needs more space, try to extend the checkpoint
  656. * store at the current address.
  657. */
  658. debug("try extend checkpoint store: %p-%p (size = %ld)\n",
  659. addr, addr + size, size);
  660. if (bkeep_mmap(addr, size, PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  661. NULL, 0, "cpstore") < 0)
  662. return NULL;
  663. } else {
  664. /*
  665. * Here we use a strategy to reduce internal fragmentation of virtual
  666. * memory space. Because we need a relatively large, continuous space
  667. * for dumping the checkpoint data, internal fragmentation can cause
  668. * the process to drain the virtual address space after forking a few
  669. * times. The previous space used for checkpoint may be fragmented
  670. * at the next fork.
  671. *
  672. * A simple trick we use here is to reserve some space right after the
  673. * checkpoint space. The reserved space is half of the size of the
  674. * checkpoint space, but can be further fine-tuned.
  675. */
  676. size_t reserve_size = ALIGN_UP(size >> 1);
  677. debug("try allocate checkpoint store (size = %ld, reserve = %ld)\n",
  678. size, reserve_size);
  679. /*
  680. * Allocating the checkpoint space at the first space found from the
  681. * top of the virtual address space.
  682. */
  683. addr = bkeep_unmapped_any(size + reserve_size, PROT_READ|PROT_WRITE,
  684. CP_VMA_FLAGS, NULL, 0, "cpstore");
  685. if (!addr)
  686. return NULL;
  687. bkeep_munmap(addr + size, reserve_size, CP_VMA_FLAGS);
  688. }
  689. addr = (void *) DkVirtualMemoryAlloc(addr, size, 0,
  690. PAL_PROT_READ|PAL_PROT_WRITE);
  691. if (!addr)
  692. bkeep_munmap(addr, size, CP_VMA_FLAGS);
  693. return addr;
  694. }
  695. DEFINE_PROFILE_CATEGORY(migrate_proc, migrate);
  696. DEFINE_PROFILE_INTERVAL(migrate_create_process, migrate_proc);
  697. DEFINE_PROFILE_INTERVAL(migrate_create_gipc, migrate_proc);
  698. DEFINE_PROFILE_INTERVAL(migrate_connect_ipc, migrate_proc);
  699. DEFINE_PROFILE_INTERVAL(migrate_init_checkpoint, migrate_proc);
  700. DEFINE_PROFILE_INTERVAL(migrate_save_checkpoint, migrate_proc);
  701. DEFINE_PROFILE_INTERVAL(migrate_send_header, migrate_proc);
  702. DEFINE_PROFILE_INTERVAL(migrate_send_checkpoint, migrate_proc);
  703. DEFINE_PROFILE_OCCURENCE(migrate_send_on_stream, migrate_proc);
  704. DEFINE_PROFILE_OCCURENCE(migrate_send_gipc_pages, migrate_proc);
  705. DEFINE_PROFILE_INTERVAL(migrate_send_pal_handles, migrate_proc);
  706. DEFINE_PROFILE_INTERVAL(migrate_free_checkpoint, migrate_proc);
  707. DEFINE_PROFILE_INTERVAL(migrate_wait_response, migrate_proc);
  708. static bool warn_no_gipc __attribute_migratable = true;
  709. /*
  710. * Create a new process and migrate the process states to the new process.
  711. *
  712. * @migrate: migration function defined by the caller
  713. * @exec: the executable to load in the new process
  714. * @argv: arguments passed to the new process
  715. * @thread: thread handle to be migrated to the new process
  716. *
  717. * The remaining arguments are passed into the migration function.
  718. */
  719. int do_migrate_process (int (*migrate) (struct shim_cp_store *,
  720. struct shim_thread *,
  721. struct shim_process *, va_list),
  722. struct shim_handle * exec,
  723. const char ** argv,
  724. struct shim_thread * thread, ...)
  725. {
  726. int ret = 0;
  727. struct shim_process * new_process = NULL;
  728. struct newproc_header hdr;
  729. size_t bytes;
  730. memset(&hdr, 0, sizeof(hdr));
  731. #ifdef PROFILE
  732. unsigned long begin_create_time = GET_PROFILE_INTERVAL();
  733. unsigned long create_time = begin_create_time;
  734. #endif
  735. BEGIN_PROFILE_INTERVAL();
  736. /*
  737. * Create the process first. The new process requires some time
  738. * to initialize before starting to receive checkpoint data.
  739. * Parallizing the process creation and checkpointing can improve
  740. * the latency of forking.
  741. */
  742. PAL_HANDLE proc = DkProcessCreate(exec ? qstrgetstr(&exec->uri) :
  743. pal_control.executable, argv);
  744. if (!proc) {
  745. ret = -PAL_ERRNO;
  746. goto err;
  747. }
  748. SAVE_PROFILE_INTERVAL(migrate_create_process);
  749. /*
  750. * Detect if GIPC is supported by the host. If GIPC is not supported
  751. * forking may be slow because we have to use RPC streams for migrating
  752. * user memory.
  753. */
  754. bool use_gipc = false;
  755. PAL_NUM gipc_key;
  756. PAL_HANDLE gipc_hdl = DkCreatePhysicalMemoryChannel(&gipc_key);
  757. if (gipc_hdl) {
  758. debug("created gipc store: gipc:%lu\n", gipc_key);
  759. use_gipc = true;
  760. SAVE_PROFILE_INTERVAL(migrate_create_gipc);
  761. } else {
  762. if (warn_no_gipc) {
  763. warn_no_gipc = false;
  764. SYS_PRINTF("WARNING: no physical memory support, process creation "
  765. "may be slow.\n");
  766. }
  767. }
  768. /* Create process and IPC bookkeepings */
  769. if (!(new_process = create_new_process(true))) {
  770. ret = -ENOMEM;
  771. goto err;
  772. }
  773. if (!(new_process->self = create_ipc_port(0, false))) {
  774. ret = -EACCES;
  775. goto err;
  776. }
  777. SAVE_PROFILE_INTERVAL(migrate_connect_ipc);
  778. /* Allocate a space for dumping the checkpoint data. */
  779. struct shim_cp_store cpstore;
  780. memset(&cpstore, 0, sizeof(cpstore));
  781. cpstore.alloc = cp_alloc;
  782. cpstore.use_gipc = use_gipc;
  783. cpstore.bound = CP_INIT_VMA_SIZE;
  784. while (1) {
  785. /*
  786. * Try allocating a space of a certain size. If the allocation fails,
  787. * continue to try with smaller sizes.
  788. */
  789. cpstore.base = (ptr_t) cp_alloc(&cpstore, 0, cpstore.bound);
  790. if (cpstore.base)
  791. break;
  792. cpstore.bound >>= 1;
  793. if (cpstore.bound < allocsize)
  794. break;
  795. }
  796. if (!cpstore.base) {
  797. ret = -ENOMEM;
  798. debug("failed creating checkpoint store\n");
  799. goto err;
  800. }
  801. SAVE_PROFILE_INTERVAL(migrate_init_checkpoint);
  802. /* Calling the migration function defined by the caller. */
  803. va_list ap;
  804. va_start(ap, thread);
  805. ret = (*migrate) (&cpstore, thread, new_process, ap);
  806. va_end(ap);
  807. if (ret < 0) {
  808. debug("failed creating checkpoint (ret = %d)\n", ret);
  809. goto err;
  810. }
  811. SAVE_PROFILE_INTERVAL(migrate_save_checkpoint);
  812. unsigned long checkpoint_time = GET_PROFILE_INTERVAL();
  813. unsigned long checkpoint_size = cpstore.offset + cpstore.mem_size;
  814. /* Checkpoint data created. */
  815. debug("checkpoint of %lu bytes created, %lu microsecond is spent.\n",
  816. checkpoint_size, checkpoint_time);
  817. hdr.checkpoint.hdr.addr = (void *) cpstore.base;
  818. hdr.checkpoint.hdr.size = checkpoint_size;
  819. if (cpstore.mem_nentries) {
  820. hdr.checkpoint.mem.entoffset =
  821. (ptr_t) cpstore.last_mem_entry - cpstore.base;
  822. hdr.checkpoint.mem.nentries = cpstore.mem_nentries;
  823. }
  824. if (cpstore.use_gipc) {
  825. snprintf(hdr.checkpoint.gipc.uri, sizeof(hdr.checkpoint.gipc.uri),
  826. "gipc:%lld", gipc_key);
  827. if (cpstore.gipc_nentries) {
  828. hdr.checkpoint.gipc.entoffset =
  829. (ptr_t) cpstore.last_gipc_entry - cpstore.base;
  830. hdr.checkpoint.gipc.nentries = cpstore.gipc_nentries;
  831. }
  832. }
  833. if (cpstore.palhdl_nentries) {
  834. hdr.checkpoint.palhdl.entoffset =
  835. (ptr_t) cpstore.last_palhdl_entry - cpstore.base;
  836. hdr.checkpoint.palhdl.nentries = cpstore.palhdl_nentries;
  837. }
  838. #ifdef PROFILE
  839. hdr.begin_create_time = begin_create_time;
  840. hdr.create_time = create_time;
  841. hdr.write_proc_time = GET_PROFILE_INTERVAL();
  842. #endif
  843. /*
  844. * Sending a header to the new process through the RPC stream to
  845. * notify the process to start receiving the checkpoint.
  846. */
  847. bytes = DkStreamWrite(proc, 0, sizeof(struct newproc_header), &hdr, NULL);
  848. if (!bytes) {
  849. ret = -PAL_ERRNO;
  850. debug("failed writing to process stream (ret = %d)\n", ret);
  851. goto err;
  852. } else if (bytes < sizeof(struct newproc_header)) {
  853. ret = -EACCES;
  854. goto err;
  855. }
  856. ADD_PROFILE_OCCURENCE(migrate_send_on_stream, bytes);
  857. SAVE_PROFILE_INTERVAL(migrate_send_header);
  858. /* Sending the checkpoint either through GIPC or the RPC stream */
  859. ret = cpstore.use_gipc ? send_checkpoint_by_gipc(gipc_hdl, &cpstore) :
  860. send_checkpoint_on_stream(proc, &cpstore);
  861. if (ret < 0) {
  862. debug("failed sending checkpoint (ret = %d)\n", ret);
  863. goto err;
  864. }
  865. SAVE_PROFILE_INTERVAL(migrate_send_checkpoint);
  866. /*
  867. * For socket and RPC streams, we need to migrate the PAL handles
  868. * to the new process using PAL calls.
  869. */
  870. if ((ret = send_handles_on_stream(proc, &cpstore)) < 0)
  871. goto err;
  872. SAVE_PROFILE_INTERVAL(migrate_send_pal_handles);
  873. /* Free the checkpoint space */
  874. if ((ret = bkeep_munmap((void *) cpstore.base, cpstore.bound,
  875. CP_VMA_FLAGS)) < 0) {
  876. debug("failed unmaping checkpoint (ret = %d)\n", ret);
  877. goto err;
  878. }
  879. DkVirtualMemoryFree((PAL_PTR) cpstore.base, cpstore.bound);
  880. SAVE_PROFILE_INTERVAL(migrate_free_checkpoint);
  881. /* Wait for the response from the new process */
  882. struct newproc_response res;
  883. bytes = DkStreamRead(proc, 0, sizeof(struct newproc_response), &res,
  884. NULL, 0);
  885. if (bytes == 0) {
  886. ret = -PAL_ERRNO;
  887. goto err;
  888. }
  889. SAVE_PROFILE_INTERVAL(migrate_wait_response);
  890. if (gipc_hdl)
  891. DkObjectClose(gipc_hdl);
  892. /* Notify the namespace manager regarding the subleasing of TID */
  893. ipc_pid_sublease_send(res.child_vmid, thread->tid,
  894. qstrgetstr(&new_process->self->uri),
  895. NULL);
  896. /* Listen on the RPC stream to the new process */
  897. add_ipc_port_by_id(res.child_vmid, proc,
  898. IPC_PORT_DIRCLD|IPC_PORT_LISTEN|IPC_PORT_KEEPALIVE,
  899. &ipc_child_exit,
  900. NULL);
  901. destroy_process(new_process);
  902. return 0;
  903. err:
  904. if (gipc_hdl)
  905. DkObjectClose(gipc_hdl);
  906. if (proc)
  907. DkObjectClose(proc);
  908. if (new_process)
  909. destroy_process(new_process);
  910. SYS_PRINTF("process creation failed\n");
  911. return ret;
  912. }
  913. /*
  914. * Loading the checkpoint from the parent process or a checkpoint file
  915. *
  916. * @hdr: checkpoint header
  917. * @cpptr: returning the pointer of the loaded checkpoint
  918. */
  919. int do_migration (struct newproc_cp_header * hdr, void ** cpptr)
  920. {
  921. void * base = NULL;
  922. size_t size = hdr->hdr.size;
  923. PAL_PTR mapaddr;
  924. PAL_NUM mapsize;
  925. long rebase;
  926. bool use_gipc = !!hdr->gipc.uri[0];
  927. PAL_HANDLE gipc_store;
  928. int ret = 0;
  929. BEGIN_PROFILE_INTERVAL();
  930. /*
  931. * Allocate a large enough space to load the checkpoint data.
  932. *
  933. * If CPSTORE_DERANDOMIZATION is enabled, try to allocate the space
  934. * at the exact address where the checkpoint is created. Otherwise,
  935. * just allocate at the first space we found from the top of the virtual
  936. * memory space.
  937. */
  938. #if CPSTORE_DERANDOMIZATION == 1
  939. if (hdr->hdr.addr
  940. && lookup_overlap_vma(hdr->hdr.addr, size, NULL) == -ENOENT) {
  941. /* Try to load the checkpoint at the same address */
  942. base = hdr->hdr.addr;
  943. mapaddr = (PAL_PTR) ALIGN_DOWN(base);
  944. mapsize = (PAL_PTR) ALIGN_UP(base + size) - mapaddr;
  945. /* Need to create VMA before allocation */
  946. ret = bkeep_mmap((void *) mapaddr, mapsize,
  947. PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  948. NULL, 0, "cpstore");
  949. if (ret < 0)
  950. base = NULL;
  951. }
  952. #endif
  953. if (!base) {
  954. base = bkeep_unmapped_any(ALIGN_UP(size),
  955. PROT_READ|PROT_WRITE, CP_VMA_FLAGS,
  956. NULL, 0, "cpstore");
  957. if (!base)
  958. return -ENOMEM;
  959. mapaddr = (PAL_PTR) base;
  960. mapsize = (PAL_NUM) ALIGN_UP(size);
  961. }
  962. debug("checkpoint mapped at %p-%p\n", base, base + size);
  963. PAL_FLG pal_prot = PAL_PROT_READ|PAL_PROT_WRITE;
  964. PAL_PTR mapped = mapaddr;
  965. if (use_gipc) {
  966. debug("open gipc store: %s\n", hdr->gipc.uri);
  967. gipc_store = DkStreamOpen(hdr->gipc.uri, 0, 0, 0, 0);
  968. if (!gipc_store ||
  969. !DkPhysicalMemoryMap(gipc_store, 1, &mapped, &mapsize, &pal_prot))
  970. return -PAL_ERRNO;
  971. SAVE_PROFILE_INTERVAL(child_load_checkpoint_by_gipc);
  972. } else {
  973. void * mapped = DkVirtualMemoryAlloc(mapaddr, mapsize, 0, pal_prot);
  974. if (!mapped)
  975. return -PAL_ERRNO;
  976. }
  977. assert(mapaddr == mapped);
  978. /*
  979. * If the checkpoint is loaded at a different address from where it is
  980. * created, we need to rebase the pointers in the checkpoint.
  981. */
  982. rebase = (long) ((uintptr_t) base - (uintptr_t) hdr->hdr.addr);
  983. /* Load the memory data sent separately over GIPC or the RPC stream. */
  984. if (use_gipc) {
  985. if ((ret = restore_gipc(gipc_store, &hdr->gipc, (ptr_t) base, rebase)) < 0)
  986. return ret;
  987. SAVE_PROFILE_INTERVAL(child_load_memory_by_gipc);
  988. DkStreamDelete(gipc_store, 0);
  989. } else {
  990. size_t total_bytes = 0;
  991. while (total_bytes < size) {
  992. int bytes = DkStreamRead(PAL_CB(parent_process), 0,
  993. size - total_bytes,
  994. (void *) base + total_bytes, NULL, 0);
  995. if (!bytes) {
  996. if (PAL_ERRNO == EINTR || PAL_ERRNO == EAGAIN ||
  997. PAL_ERRNO == EWOULDBLOCK)
  998. continue;
  999. return -PAL_ERRNO;
  1000. }
  1001. total_bytes += bytes;
  1002. }
  1003. SAVE_PROFILE_INTERVAL(child_load_checkpoint_on_pipe);
  1004. debug("%lu bytes read on stream\n", total_bytes);
  1005. }
  1006. /* Receive socket or RPC handles from the parent process. */
  1007. ret = receive_handles_on_stream(&hdr->palhdl, (ptr_t) base, rebase);
  1008. if (ret < 0) {
  1009. /* TODO: unload the checkpoint space */
  1010. return ret;
  1011. }
  1012. SAVE_PROFILE_INTERVAL(child_receive_handles);
  1013. migrated_memory_start = (void *) mapaddr;
  1014. migrated_memory_end = (void *) mapaddr + mapsize;
  1015. *cpptr = (void *) base;
  1016. return 0;
  1017. }
  1018. void restore_context (struct shim_context * context)
  1019. {
  1020. assert(context->regs);
  1021. struct shim_regs regs = *context->regs;
  1022. debug("restore context: SP = 0x%08lx, IP = 0x%08lx\n", regs.rsp, regs.rip);
  1023. /* don't clobber redzone. If sigaltstack is used,
  1024. * this area won't be clobbered by signal context */
  1025. *(unsigned long*) (regs.rsp - RED_ZONE_SIZE - 8) = regs.rip;
  1026. /* Ready to resume execution, re-enable preemption. */
  1027. shim_tcb_t * tcb = shim_get_tls();
  1028. __enable_preempt(tcb);
  1029. memset(context, 0, sizeof(struct shim_context));
  1030. __asm__ volatile("movq %0, %%rsp\r\n"
  1031. "addq $2 * 8, %%rsp\r\n" /* skip orig_rax and rsp */
  1032. "popq %%r15\r\n"
  1033. "popq %%r14\r\n"
  1034. "popq %%r13\r\n"
  1035. "popq %%r12\r\n"
  1036. "popq %%r11\r\n"
  1037. "popq %%r10\r\n"
  1038. "popq %%r9\r\n"
  1039. "popq %%r8\r\n"
  1040. "popq %%rcx\r\n"
  1041. "popq %%rdx\r\n"
  1042. "popq %%rsi\r\n"
  1043. "popq %%rdi\r\n"
  1044. "popq %%rbx\r\n"
  1045. "popq %%rbp\r\n"
  1046. "popfq\r\n"
  1047. "movq "XSTRINGIFY(SHIM_REGS_RSP)" - "XSTRINGIFY(SHIM_REGS_RIP)"(%%rsp), %%rsp\r\n"
  1048. "movq $0, %%rax\r\n"
  1049. "jmp *-"XSTRINGIFY(RED_ZONE_SIZE)"-8(%%rsp)\r\n"
  1050. :: "g"(&regs) : "memory");
  1051. }