shim_checkpoint.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_checkpoints.c
  17. *
  18. * This file contains definitions and macros for checkpointing method.
  19. */
  20. #ifndef _SHIM_CHECKPOINT_H_
  21. #define _SHIM_CHECKPOINT_H_
  22. #include <shim_defs.h>
  23. #include <shim_ipc.h>
  24. #include <shim_profile.h>
  25. #include <pal.h>
  26. #include <stdarg.h>
  27. #ifdef __i386__
  28. typedef uint32_t ptr_t;
  29. # define hashfunc hash32
  30. #else
  31. typedef uint64_t ptr_t;
  32. # define hashfunc hash64
  33. #endif
  34. #define __attribute_migratable __attribute__((section(".migratable")))
  35. extern char __migratable;
  36. extern char __migratable_end;
  37. /* TSAI 7/11/2012:
  38. The migration scheme we are expecting is to support an easy syntax to
  39. implement migration procedure. A migration procedure can be written
  40. in teh following syntax:
  41. BEGIN_MIGRATE_DEFINITION(exec)
  42. {
  43. DEFINE_MIGRATE(thread, );
  44. DEFINE_MIGRATE(handle_map, );
  45. }
  46. void *checkpoint = DO_MIGRATE(exec);
  47. The structure of checkpoint data will be a counting-down stack-like
  48. memory segment, with enough space reserved below for 1. in case the
  49. dry run miscalculate the checkpoint size or 2. stack use for the new
  50. thread.
  51. Below is the figure for our checkpoint structure:
  52. (later added by PAL: argc program arguments
  53. argv[0]
  54. argv[1]
  55. ...
  56. envp[0] env variables
  57. envp[1]
  58. ...
  59. NULL-end
  60. auxv[0] aux vectors
  61. auxv[1]
  62. ...
  63. auxv[n] AT_NULL
  64. Low Bytes -------------------------------------------------
  65. checkpoint base (identified by a magic number)
  66. -------------------------------------------------
  67. checkpoint_entry[0]
  68. checkpoint_entry[1]
  69. checkpoint_entry[2]
  70. ...
  71. checkpoint_entry[n] CP_NULL
  72. ------------------------------------------------
  73. data section for checkpoint 0
  74. data section for checkpoint 1
  75. data section for checkpoint 2
  76. ...
  77. data section for checkpoint n-1
  78. High Bytes ------------------------------------------------
  79. */
  80. struct shim_cp_entry
  81. {
  82. ptr_t cp_type; /* entry type */
  83. union
  84. {
  85. ptr_t cp_val; /* interger value */
  86. /* orignally there is a pointer, now we don't need them */
  87. } cp_un;
  88. };
  89. struct shim_gipc_entry {
  90. struct shim_gipc_entry * next;
  91. enum { ABS_ADDR, REL_ADDR, ANY_ADDR } addr_type;
  92. void * addr;
  93. int npages;
  94. int prot;
  95. struct shim_vma * vma;
  96. #if HASH_GIPC == 1
  97. unsigned long first_hash;
  98. #endif
  99. };
  100. #define SET_GIPC_REL_ADDR(gipc) \
  101. do { \
  102. (gipc)->addr_type = REL_ADDR; \
  103. (gipc)->addr = (void *) ((gipc)->addr - (void *) &__load_address); \
  104. } while (0)
  105. struct shim_mem_entry {
  106. void * addr;
  107. int size;
  108. int prot;
  109. bool need_alloc, need_prot;
  110. struct shim_vma * vma;
  111. void * data;
  112. };
  113. struct shim_cp_store {
  114. void * cpaddr;
  115. void * cpdata;
  116. size_t cpsize;
  117. void * addr_map;
  118. bool use_gipc;
  119. struct shim_gipc_entry * gipc_entries, * gipc_entries_tail;
  120. int gipc_nentries;
  121. };
  122. #define INIT_CP_STORE_GIPC(store) \
  123. do { \
  124. (store)->use_gipc = false; \
  125. (store)->gipc_entries = NULL; \
  126. (store)->gipc_entries_tail = NULL; \
  127. (store)->gipc_nentries = 0; \
  128. } while (0)
  129. #define INIT_CP_STORE(store) \
  130. do { \
  131. (store)->cpaddr = NULL; \
  132. (store)->cpdata = NULL; \
  133. (store)->cpsize = 0; \
  134. (store)->addr_map = create_addr_map(); \
  135. INIT_CP_STORE_GIPC(store); \
  136. } while (0)
  137. #define MIGRATE_FUNC_ARGS \
  138. struct shim_cp_store * store, struct shim_cp_entry ** ent, ptr_t base, \
  139. unsigned long * offset, void * obj, size_t size, void ** objp, \
  140. bool recursive, bool dry
  141. #define MIGRATE_FUNC_RET size_t
  142. #define RESUME_FUNC_ARGS \
  143. struct shim_cp_entry ** ent, ptr_t base, size_t cpsize, long cprebase
  144. #define RESUME_FUNC_RET int
  145. typedef MIGRATE_FUNC_RET (*migrate_func) (MIGRATE_FUNC_ARGS);
  146. typedef RESUME_FUNC_RET (*resume_func) (RESUME_FUNC_ARGS);
  147. extern const char * __migrate_name;
  148. extern const migrate_func __migrate_func;
  149. extern const resume_func __resume_func;
  150. #define CP_NULL 0
  151. #define CP_IGNORE 1
  152. #define CP_BASE 2
  153. #define CP_ADDR 3
  154. #define CP_SIZE 4
  155. #define CP_PID 5
  156. #define CP_UID 6
  157. #define CP_GID 7
  158. #define CP_FD 8
  159. #define CP_BOOL 9
  160. #define CP_PALHDL 10
  161. #define CP_FUNC_BASE 11
  162. #define CP_FUNC_INDEX(name) \
  163. ({ extern const migrate_func migrate_func_##name; \
  164. &migrate_func_##name - &__migrate_func; })
  165. #define CP_FUNC(name) CP_FUNC_BASE + CP_FUNC_INDEX(name)
  166. #define CP_FUNC_NAME(type) (&__migrate_name)[(type) - CP_FUNC_BASE]
  167. #define ADD_ENTRY(type, value) \
  168. do { \
  169. USED += sizeof(struct shim_cp_entry); \
  170. if (!dry) { \
  171. struct shim_cp_entry * tmp = (*ent)++; \
  172. tmp->cp_type = CP_##type; \
  173. tmp->cp_un.cp_val = (ptr_t) (value); \
  174. \
  175. if (DEBUG_CHECKPOINT) \
  176. debug("ADD CP_" #type "(%p) :%d\n", \
  177. tmp->cp_un.cp_val, \
  178. tmp - (struct shim_cp_entry *) base); \
  179. } else { \
  180. if (DEBUG_CHECKPOINT) \
  181. debug("(dry) ADD CP_" #type "\n"); \
  182. } \
  183. } while(0)
  184. #define ADD_OFFSET(size) \
  185. ({ \
  186. int _size = ((size) + 7) & ~7; \
  187. USED += _size; \
  188. if (!dry) \
  189. *offset -= _size; \
  190. if (DEBUG_CHECKPOINT) \
  191. debug("%sADD OFFSET(%d)\n", \
  192. dry ? "(dry) " : "", _size); \
  193. dry ? 0 : *offset; \
  194. })
  195. #define ADD_FUNC_ENTRY(value) \
  196. do { \
  197. USED += sizeof(struct shim_cp_entry); \
  198. if (!dry) { \
  199. struct shim_cp_entry * tmp = (*ent)++; \
  200. tmp->cp_type = CP_FUNC_TYPE; \
  201. tmp->cp_un.cp_val = (ptr_t) value; \
  202. \
  203. if (DEBUG_CHECKPOINT) \
  204. debug("ADD CP_FUNC_%s(%p) :%d\n", CP_FUNC_NAME, \
  205. tmp->cp_un.cp_val, \
  206. tmp - (struct shim_cp_entry *) base); \
  207. } else { \
  208. if (DEBUG_CHECKPOINT) \
  209. debug("(dry) ADD CP_FUNC_%s\n", CP_FUNC_NAME); \
  210. } \
  211. } while(0)
  212. #define GET_ENTRY(type) \
  213. ({ struct shim_cp_entry * tmp = (*ent)++; \
  214. \
  215. while (tmp->cp_type != CP_##type) \
  216. tmp = (*ent)++; \
  217. \
  218. /* debug("GET CP_" #type "(%p) :%d\n", \
  219. tmp->cp_un.cp_val, \
  220. tmp - (struct shim_cp_entry *) base); */ \
  221. \
  222. tmp->cp_un.cp_val; \
  223. })
  224. #define GET_FUNC_ENTRY() \
  225. ({ struct shim_cp_entry * tmp = (*ent)++; \
  226. \
  227. while (tmp->cp_type != CP_FUNC_TYPE) \
  228. tmp = (*ent)++; \
  229. \
  230. /* debug("GET CP_FUNC_%s(%p) :%d\n", CP_FUNC_NAME, \
  231. tmp->cp_un.cp_val, \
  232. tmp - (struct shim_cp_entry *) base); */ \
  233. \
  234. tmp->cp_un.cp_val; \
  235. })
  236. #define DEFINE_MIGRATE_FUNC(name) \
  237. const char * migrate_name_##name \
  238. __attribute__((section(".migrate_name." #name))) = #name; \
  239. \
  240. extern MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS); \
  241. const migrate_func migrate_func_##name \
  242. __attribute__((section(".migrate." #name))) = &migrate_##name;\
  243. \
  244. extern RESUME_FUNC_RET resume_##name (RESUME_FUNC_ARGS); \
  245. const resume_func resume_func_##name \
  246. __attribute__((section(".resume." #name))) = &resume_##name;\
  247. \
  248. DEFINE_PROFILE_INTERVAL(migrate_##name, migrate_func); \
  249. DEFINE_PROFILE_INTERVAL(resume_##name, resume_func); \
  250. #define MIGRATE_FUNC_BODY(name) \
  251. MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS) \
  252. { \
  253. int CP_FUNC_TYPE __attribute__((unused)) \
  254. = CP_FUNC(name); \
  255. const char * CP_FUNC_NAME __attribute__((unused)) \
  256. = #name; \
  257. size_t USED = 0; \
  258. BEGIN_PROFILE_INTERVAL(); \
  259. ASSIGN_PROFILE_INTERVAL(migrate_##name);
  260. #define END_MIGRATE_FUNC \
  261. if (!dry) SAVE_PROFILE_INTERVAL_ASSIGNED(); \
  262. return USED; \
  263. }
  264. #define RESUME_FUNC_BODY(name) \
  265. RESUME_FUNC_RET resume_##name (RESUME_FUNC_ARGS) \
  266. { \
  267. int CP_FUNC_TYPE __attribute__((unused)) \
  268. = CP_FUNC(name); \
  269. const char * CP_FUNC_NAME __attribute__((unused)) \
  270. = #name; \
  271. BEGIN_PROFILE_INTERVAL(); \
  272. ASSIGN_PROFILE_INTERVAL(resume_##name);
  273. #define END_RESUME_FUNC \
  274. SAVE_PROFILE_INTERVAL_ASSIGNED(); \
  275. return 0; \
  276. }
  277. #define RESUME_REBASE(obj) \
  278. do { \
  279. void * _ptr = &(obj); \
  280. size_t _size = sizeof(obj); \
  281. void ** _p; \
  282. for (_p = _ptr ; _p < (void **)(_ptr + _size) ; _p++) \
  283. if (*_p) \
  284. *_p += cprebase; \
  285. } while (0)
  286. struct shim_addr_map {
  287. ptr_t addr;
  288. unsigned long offset;
  289. size_t size;
  290. };
  291. void * create_addr_map (void);
  292. void destroy_addr_map (void * map);
  293. struct shim_addr_map *
  294. get_addr_map_entry (void * map, ptr_t addr, size_t size, bool create);
  295. #define DO_MIGRATE_SIZE(name, obj, size, objp, recur) \
  296. do { \
  297. extern MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS); \
  298. \
  299. USED += migrate_##name (store, ent, base, offset, \
  300. obj, size, (void **) objp, recur, dry); \
  301. } while (0)
  302. #define __DO_MIGRATE(name, obj, objp, recur) \
  303. do { \
  304. extern MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS); \
  305. \
  306. USED += migrate_##name (store, ent, base, offset, \
  307. obj, sizeof(*(obj)), (void **) objp, recur, dry); \
  308. } while (0)
  309. #define DO_MIGRATE_MEMBER(name, obj, newobj, member, recur) \
  310. do { \
  311. typeof(obj->member) *(objp) = (newobj) ? \
  312. &(newobj)->member : NULL; \
  313. \
  314. DO_MIGRATE(name, (obj)->member, (objp), (recur)); \
  315. } while (0);
  316. #define DO_MIGRATE(name, obj, objp, recur) \
  317. do { \
  318. if (!obj) \
  319. break; \
  320. \
  321. struct shim_addr_map * _e = get_addr_map_entry (store->addr_map, \
  322. (ptr_t) (obj), sizeof(*(obj)), 0); \
  323. \
  324. if (_e && !ENTRY_JUST_CREATED(_e->offset) && !(recur)) \
  325. { \
  326. if (!dry && objp) \
  327. *((typeof(obj) *) objp) = (typeof(obj)) \
  328. (base + _e->offset); \
  329. break; \
  330. } \
  331. \
  332. if (dry ? !_e || (recur) : _e != NULL) \
  333. __DO_MIGRATE(name, (obj), (objp), (recur)); \
  334. } while (0)
  335. #define DO_MIGRATE_MEMBER_IF_RECURSIVE(name, obj, newobj, member, recur) \
  336. do { \
  337. typeof(obj->member) *(objp) = (newobj) ? \
  338. &(newobj)->member : NULL; \
  339. \
  340. DO_MIGRATE_IF_RECURSIVE(name, (obj)->member, (objp), (recur)); \
  341. } while (0);
  342. #define DO_MIGRATE_IF_RECURSIVE(name, obj, objp, recur) \
  343. do { \
  344. extern MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS); \
  345. if (!obj) \
  346. break; \
  347. \
  348. struct shim_addr_map * _e = get_addr_map_entry (store->addr_map, \
  349. (ptr_t) (obj), sizeof(*(obj)), 0); \
  350. \
  351. if (!_e && !recursive) \
  352. { \
  353. if (!dry && objp) *objp = NULL; \
  354. break; \
  355. } \
  356. \
  357. if (_e && !ENTRY_JUST_CREATED(_e->offset) && !(recur)) \
  358. { \
  359. if (!dry && objp) \
  360. *((typeof(obj) *) objp) = (typeof(obj)) \
  361. (base + _e->offset); \
  362. break; \
  363. } \
  364. \
  365. /* 3 condition we need to run a recursive search \
  366. _e && !recursive && dry && recur \
  367. !_e && recursive && dry \
  368. _e && !dry */ \
  369. if (dry ? \
  370. (_e ? !recursive && (recur) : recursive) : _e != NULL) \
  371. __DO_MIGRATE(name, (obj), (objp), (recur)); \
  372. } while (0)
  373. #define DO_MIGRATE_IN_MEMBER(name, obj, newobj, member, recur) \
  374. __DO_MIGRATE(name, dry ? &(obj)->member : &(newobj)->member, \
  375. NULL, (recur))
  376. #define CHECKPOINT_ADDR (NULL)
  377. #define MAP_UNALLOCATED 0x8000000000000000
  378. #define MAP_UNASSIGNED 0x4000000000000000
  379. #define MAP_UNUSABLE (MAP_UNALLOCATED|MAP_UNASSIGNED)
  380. #define ENTRY_JUST_CREATED(off) (off & MAP_UNUSABLE)
  381. static inline __attribute__((always_inline))
  382. ptr_t add_to_migrate_map (void * map, void * obj, ptr_t off,
  383. size_t size, bool dry)
  384. {
  385. struct shim_addr_map * e = get_addr_map_entry(map,
  386. (ptr_t) obj, size, 1);
  387. ptr_t result = e->offset;
  388. if (dry) {
  389. if (result & MAP_UNALLOCATED)
  390. e->offset = MAP_UNASSIGNED;
  391. else
  392. result = 0;
  393. } else {
  394. if (result & MAP_UNUSABLE) {
  395. assert(size);
  396. assert(off >= size);
  397. e->offset = off - size;
  398. e->size = size;
  399. }
  400. }
  401. return result;
  402. }
  403. #define ADD_TO_MIGRATE_MAP(obj, off, size) \
  404. add_to_migrate_map(store->addr_map, (obj), dry ? 0 : (off), (size), dry)
  405. #define MIGRATE_DEF_ARGS \
  406. struct shim_cp_store * store, void * data, size_t size, bool dry
  407. #define BEGIN_MIGRATION_DEF(name, ...) \
  408. auto size_t migrate_def_##name (MIGRATE_DEF_ARGS, ##__VA_ARGS__) \
  409. { \
  410. size_t USED = 0; \
  411. unsigned long offset = size; \
  412. struct shim_cp_entry * ENTRY = (struct shim_cp_entry *) data; \
  413. struct shim_cp_entry * *ent = &ENTRY; \
  414. uintptr_t base = (uintptr_t) data;
  415. #define END_MIGRATION_DEF \
  416. ADD_ENTRY(NULL, 0); \
  417. return USED; \
  418. }
  419. #define DEFINE_MIGRATE(name, obj, size, recursive) \
  420. do { \
  421. extern MIGRATE_FUNC_RET migrate_##name (MIGRATE_FUNC_ARGS); \
  422. \
  423. USED += migrate_##name(store, ent, dry ? 0 : base, \
  424. dry ? 0 : &offset, (obj), (size), NULL, recursive, dry); \
  425. } while (0)
  426. #define DEBUG_RESUME 0
  427. #define DEBUG_CHECKPOINT 0
  428. #ifndef malloc_method
  429. #define malloc_method(size) system_malloc(size)
  430. #endif
  431. #include <shim_profile.h>
  432. #define START_MIGRATE(store, name, preserve, ...) \
  433. ({ int _ret = 0; \
  434. do { \
  435. size_t size; \
  436. void * data; \
  437. \
  438. BEGIN_PROFILE_INTERVAL(); \
  439. \
  440. size = migrate_def_##name((store), NULL, 0, true, ##__VA_ARGS__) \
  441. + (preserve); \
  442. SAVE_PROFILE_INTERVAL(checkpoint_predict_size); \
  443. ADD_PROFILE_OCCURENCE(checkpoint_total_size, size); \
  444. INC_PROFILE_OCCURENCE(checkpoint_count); \
  445. \
  446. data = malloc_method(size); \
  447. SAVE_PROFILE_INTERVAL(checkpoint_alloc_memory); \
  448. debug("allocate checkpoint: %p\n", data); \
  449. \
  450. if (!data) { \
  451. destroy_addr_map((store)->addr_map); \
  452. (store)->addr_map = NULL; \
  453. SAVE_PROFILE_INTERVAL(checkpoint_destroy_addr_map); \
  454. _ret = -ENOMEM; \
  455. break; \
  456. } \
  457. (store)->cpaddr = data; \
  458. (store)->cpdata = data + (preserve); \
  459. (store)->cpsize = size; \
  460. \
  461. migrate_def_##name((store), data + (preserve), size - (preserve), \
  462. false, ##__VA_ARGS__); \
  463. SAVE_PROFILE_INTERVAL(checkpoint_copy_object); \
  464. debug("complete checkpointing data\n"); \
  465. \
  466. destroy_addr_map((store)->addr_map); \
  467. SAVE_PROFILE_INTERVAL(checkpoint_destroy_addr_map); \
  468. } while (0); \
  469. _ret; })
  470. struct newproc_cp_header {
  471. struct cp_header {
  472. unsigned long cpsize;
  473. void * cpaddr;
  474. unsigned long cpoffset;
  475. } data;
  476. struct gipc_header {
  477. PAL_NUM gipc_key;
  478. unsigned long gipc_entoffset;
  479. int gipc_nentries;
  480. } gipc;
  481. };
  482. struct newproc_header {
  483. struct newproc_cp_header checkpoint;
  484. int failure;
  485. #ifdef PROFILE
  486. unsigned long begin_create_time;
  487. unsigned long create_time;
  488. unsigned long write_proc_time;
  489. #endif
  490. };
  491. struct newproc_response {
  492. IDTYPE child_vmid;
  493. int failure;
  494. };
  495. int do_migration (struct newproc_cp_header * hdr, void ** cpptr);
  496. int restore_checkpoint (void * cpdata, struct cp_header * hdr, int type);
  497. int restore_gipc (PAL_HANDLE gipc, struct gipc_header * hdr, void * cpdata,
  498. long cprebase);
  499. int send_checkpoint_by_gipc (PAL_HANDLE gipc_store,
  500. struct shim_cp_store * cpstore);
  501. int send_handles_on_stream (PAL_HANDLE stream, void * cpdata);
  502. int do_migrate_process (int (*migrate) (struct shim_cp_store *,
  503. struct shim_process *,
  504. struct shim_thread *, va_list),
  505. struct shim_handle * exec, const char ** argv,
  506. struct shim_thread * thread, ...);
  507. int init_from_checkpoint_file (const char * filename,
  508. struct newproc_cp_header * hdr,
  509. void ** cpptr);
  510. int restore_from_file (const char * filename, struct newproc_cp_header * hdr,
  511. void ** cpptr);
  512. void restore_context (struct shim_context * context);
  513. #define CHECKPOINT_REQUESTED ((IDTYPE) -1)
  514. int create_checkpoint (const char * cpdir, IDTYPE * session);
  515. int join_checkpoint (struct shim_thread * cur, ucontext_t * context);
  516. #endif /* _SHIM_CHECKPOINT_H_ */