shim_vma.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_vma.c
  17. *
  18. * This file contains codes to maintain bookkeeping of VMAs in library OS.
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_thread.h>
  22. #include <shim_handle.h>
  23. #include <shim_vma.h>
  24. #include <shim_checkpoint.h>
  25. #include <shim_fs.h>
  26. #include <pal.h>
  27. #include <linux_list.h>
  28. #include <asm/mman.h>
  29. #include <errno.h>
  30. unsigned long mem_max_npages __attribute_migratable = DEFAULT_MEM_MAX_NPAGES;
  31. static void * heap_top __attribute_migratable;
  32. static void * heap_bottom __attribute_migratable;
  33. #define VMA_MGR_ALLOC 64
  34. #define PAGE_SIZE allocsize
  35. static LOCKTYPE vma_mgr_lock;
  36. #define system_lock() lock(vma_mgr_lock)
  37. #define system_unlock() unlock(vma_mgr_lock)
  38. #define OBJ_TYPE struct shim_vma
  39. #include <memmgr.h>
  40. static MEM_MGR vma_mgr = NULL;
  41. static LIST_HEAD(vma_list);
  42. static LOCKTYPE vma_list_lock;
  43. static inline int test_vma_equal (struct shim_vma * tmp,
  44. const void * addr, size_t length)
  45. {
  46. return tmp->addr == addr &&
  47. tmp->addr + tmp->length == addr + length;
  48. }
  49. static inline int test_vma_contain (struct shim_vma * tmp,
  50. const void * addr, size_t length)
  51. {
  52. return tmp->addr <= addr &&
  53. tmp->addr + tmp->length >= addr + length;
  54. }
  55. static inline int test_vma_startin (struct shim_vma * tmp,
  56. const void * addr, size_t length)
  57. {
  58. return tmp->addr >= addr &&
  59. tmp->addr < addr + length;
  60. }
  61. static inline int test_vma_endin (struct shim_vma * tmp,
  62. const void * addr, size_t length)
  63. {
  64. return tmp->addr + tmp->length > addr &&
  65. tmp->addr + tmp->length <= addr + length;
  66. }
  67. static inline int test_vma_overlap (struct shim_vma * tmp,
  68. const void * addr, size_t length)
  69. {
  70. return test_vma_contain (tmp, addr + 1, 0) ||
  71. test_vma_contain (tmp, addr + length - 1, 0) ||
  72. test_vma_startin (tmp, addr, length - 1);
  73. }
  74. int bkeep_shim_heap (void);
  75. static void __set_heap_top (void * bottom, void * top);
  76. int init_vma (void)
  77. {
  78. if (!(vma_mgr = create_mem_mgr(init_align_up(VMA_MGR_ALLOC))))
  79. return -ENOMEM;
  80. heap_bottom = (void *) PAL_CB(user_address.start);
  81. if (heap_bottom + DEFAULT_HEAP_MIN_SIZE > PAL_CB(executable_range.start) &&
  82. heap_bottom < PAL_CB(executable_range.end))
  83. heap_bottom = (void *) ALIGN_UP(PAL_CB(executable_range.end));
  84. __set_heap_top(heap_bottom, (void *) PAL_CB(user_address.end));
  85. bkeep_shim_heap();
  86. create_lock(vma_list_lock);
  87. return 0;
  88. }
  89. /* This might not give the same vma but we might need to
  90. split after we find something */
  91. static inline void assert_vma (void)
  92. {
  93. struct shim_vma * tmp;
  94. struct shim_vma * prev __attribute__((unused)) = NULL;
  95. list_for_each_entry(tmp, &vma_list, list) {
  96. /* Assert we are really sorted */
  97. assert(tmp->length > 0);
  98. assert(!prev || prev->addr + prev->length <= tmp->addr);
  99. prev = tmp;
  100. }
  101. }
  102. static struct shim_vma * __lookup_vma (const void * addr, size_t len);
  103. static struct shim_vma * __lookup_supervma (const void * addr, size_t length,
  104. struct shim_vma ** prev);
  105. static struct shim_vma * __lookup_overlap_vma (const void * addr, size_t length,
  106. struct shim_vma ** prev);
  107. void get_vma (struct shim_vma * vma)
  108. {
  109. #ifdef DEBUG_REF
  110. int ref_count = REF_INC(vma->ref_count);
  111. debug("get vma %p(%p-%p) (ref_count = %d)\n", vma, vma->addr,
  112. vma->addr + vma->length, ref_count);
  113. #else
  114. REF_INC(vma->ref_count);
  115. #endif
  116. }
  117. void put_vma (struct shim_vma * vma)
  118. {
  119. int ref_count = REF_DEC(vma->ref_count);
  120. #ifdef DEBUG_REF
  121. debug("put vma %p(%p-%p) (ref_count = %d)\n", vma,
  122. vma->addr, vma->addr + vma->length, ref_count - 1);
  123. #endif
  124. if (ref_count < 1) {
  125. if (vma->file)
  126. put_handle(vma->file);
  127. if (MEMORY_MIGRATED(vma))
  128. memset(vma, 0, sizeof(struct shim_vma));
  129. else
  130. free_mem_obj_to_mgr(vma_mgr, vma);
  131. }
  132. }
  133. static void __remove_vma (struct shim_vma * vma)
  134. {
  135. list_del_init(&vma->list);
  136. put_vma(vma);
  137. }
  138. static int __bkeep_mmap (void * addr, size_t length,
  139. int prot, int flags,
  140. struct shim_handle * file, int offset,
  141. const char * comment);
  142. static int __bkeep_mprotect (void * addr, size_t length, int prot,
  143. const int * flags);
  144. static struct shim_vma * get_new_vma (void)
  145. {
  146. struct shim_vma * tmp =
  147. get_mem_obj_from_mgr_enlarge(vma_mgr, size_align_up(VMA_MGR_ALLOC));
  148. if (!tmp)
  149. return NULL;
  150. memset(tmp, 0, sizeof(struct shim_vma));
  151. REF_SET(tmp->ref_count, 1);
  152. return tmp;
  153. }
  154. static bool check_vma_flags (const struct shim_vma * vma, const int * flags)
  155. {
  156. if (!flags)
  157. return true;
  158. if ((vma->flags & VMA_INTERNAL) != ((*flags) & VMA_INTERNAL)) {
  159. bug();
  160. return false;
  161. }
  162. return true;
  163. }
  164. static inline void __set_comment (struct shim_vma * vma, const char * comment)
  165. {
  166. if (!comment) {
  167. vma->comment[0] = 0;
  168. return;
  169. }
  170. int len = strlen(comment);
  171. if (len > VMA_COMMENT_LEN - 1)
  172. len = VMA_COMMENT_LEN - 1;
  173. memcpy(vma->comment, comment, len + 1);
  174. }
  175. static int __bkeep_mmap (void * addr, size_t length,
  176. int prot, int flags,
  177. struct shim_handle * file, int offset,
  178. const char * comment)
  179. {
  180. struct shim_vma * prev = NULL;
  181. struct shim_vma * tmp = __lookup_supervma(addr, length, &prev);
  182. int ret = 0;
  183. if (file)
  184. get_handle(file);
  185. if (tmp) { /* the range is included in a vma */
  186. if (tmp->addr != addr || tmp->length != length) {
  187. /* we are inside some unmapped area, do a split case */
  188. ret = __bkeep_mprotect(addr, length, prot, &flags);
  189. if (ret < 0)
  190. goto err;
  191. /* now we get the exact vma handle */
  192. tmp = __lookup_vma(addr, length);
  193. assert(tmp);
  194. assert(check_vma_flags(tmp, &flags));
  195. }
  196. } else {
  197. struct shim_vma * cont = NULL, * n; /* cont: continue to scan vmas */
  198. struct list_head * pos = NULL; /* pos: position to add the vma */
  199. if (prev && prev->addr == addr &&
  200. prev->length <= length) { /* find a vma at the same addr */
  201. cont = tmp = prev;
  202. } else { /* need to add a new vma */
  203. unlock(vma_list_lock);
  204. if (!(tmp = get_new_vma()))
  205. return -ENOMEM;
  206. lock(vma_list_lock);
  207. if (prev) { /* has a precendent vma */
  208. if (test_vma_endin(prev, addr, length)) {
  209. if (!check_vma_flags(prev, &flags)) {
  210. ret = -EACCES;
  211. goto err;
  212. }
  213. /* the previous vma ends in the range; otherwise, there is
  214. * no overlapping. Another case is handled by the supervma
  215. * case. */
  216. prev->length = addr - prev->addr;
  217. }
  218. assert(prev->addr + prev->length <= addr);
  219. cont = prev;
  220. pos = &prev->list;
  221. } else { /* has no precendent vma */
  222. cont = tmp;
  223. list_add(&tmp->list, &vma_list);
  224. }
  225. }
  226. if (cont)
  227. list_for_each_entry_safe_continue(cont, n, &vma_list, list) {
  228. if (!test_vma_startin(cont, addr, length))
  229. break;
  230. if (!check_vma_flags(cont, &flags)) {
  231. ret = -EACCES;
  232. goto err;
  233. }
  234. if (test_vma_endin(cont, addr, length)) {
  235. __remove_vma(cont);
  236. continue;
  237. }
  238. long offset = addr + length - cont->addr;
  239. assert(offset > 0);
  240. if (cont->file)
  241. cont->offset += offset;
  242. cont->addr += offset;
  243. cont->length -= offset;
  244. break;
  245. }
  246. if (tmp && pos)
  247. list_add(&tmp->list, pos);
  248. }
  249. tmp->addr = addr;
  250. tmp->length = length;
  251. tmp->prot = prot;
  252. tmp->flags = flags|((file && (prot & PROT_WRITE)) ? VMA_TAINTED : 0);
  253. tmp->file = file;
  254. tmp->offset = offset;
  255. __set_comment(tmp, comment);
  256. return 0;
  257. err:
  258. if (file)
  259. put_handle(file);
  260. return ret;
  261. }
  262. int bkeep_mmap (void * addr, size_t length, int prot, int flags,
  263. struct shim_handle * file, int offset,
  264. const char * comment)
  265. {
  266. if (!addr || !length)
  267. return -EINVAL;
  268. lock(vma_list_lock);
  269. int ret = __bkeep_mmap(addr, length, prot, flags, file, offset,
  270. comment);
  271. assert_vma();
  272. unlock(vma_list_lock);
  273. return ret;
  274. }
  275. /*
  276. * munmap start at any address and it might be split in between so
  277. * We need to split the area aur reduce the size
  278. * Check the address falls between alread allocated area or not
  279. */
  280. static int __bkeep_munmap (void * addr, size_t length, const int * flags)
  281. {
  282. struct shim_vma * tmp, * n;
  283. list_for_each_entry_safe(tmp, n, &vma_list, list) {
  284. if (test_vma_equal (tmp, addr, length)) {
  285. if (!check_vma_flags(tmp, flags))
  286. return -EACCES;
  287. __remove_vma(tmp);
  288. } else if (test_vma_overlap (tmp, addr, length)) {
  289. unsigned long before_length;
  290. unsigned long after_length;
  291. unsigned long after_offset;
  292. if (addr > tmp->addr)
  293. before_length = addr - tmp->addr;
  294. else
  295. before_length = 0;
  296. if (tmp->addr + tmp->length > addr + length)
  297. after_length = (tmp->addr + tmp->length) - (addr + length);
  298. else
  299. after_length = 0;
  300. after_offset = tmp->file ? tmp->offset + tmp->length -
  301. after_length : 0;
  302. /* split case
  303. * it is Unlikely that a process does an partical unmap
  304. * but We take care of it by splitting the book-keep
  305. *
  306. * case 1 if the vma is entirely between a mapped area
  307. * .e.g See case:
  308. * ---unmap--
  309. * ------map-----------
  310. */
  311. if (before_length) {
  312. /* Case 1: Space in the vma before */
  313. if (!check_vma_flags(tmp, flags))
  314. return -EACCES;
  315. tmp->length = before_length;
  316. if (after_length) {
  317. /* Case 2: Space before and also space after */
  318. int ret = __bkeep_mmap((void *) addr + length, after_length,
  319. tmp->prot, tmp->flags,
  320. tmp->file, after_offset,
  321. tmp->comment);
  322. if (ret < 0)
  323. return ret;
  324. }
  325. } else if (after_length) {
  326. /* Case 3: Only after length */
  327. if (!check_vma_flags(tmp, flags))
  328. return -EACCES;
  329. tmp->addr = (void *) addr + length;
  330. tmp->length = after_length;
  331. tmp->offset = after_offset;
  332. } else {
  333. if (!check_vma_flags(tmp, flags))
  334. return -EACCES;
  335. __remove_vma(tmp);
  336. }
  337. } else if (tmp->addr > (addr + length))
  338. break;
  339. }
  340. return 0;
  341. }
  342. int bkeep_munmap (void * addr, size_t length, const int * flags)
  343. {
  344. if (!addr || !length)
  345. return -EINVAL;
  346. lock(vma_list_lock);
  347. int ret = __bkeep_munmap(addr, length, flags);
  348. assert_vma();
  349. unlock(vma_list_lock);
  350. return ret;
  351. }
  352. static int __bkeep_mprotect (void * addr, size_t length, int prot,
  353. const int * flags)
  354. {
  355. struct shim_vma * tmp = __lookup_vma(addr, length);
  356. int ret;
  357. if (tmp) {
  358. /* exact match */
  359. if (!check_vma_flags(tmp, flags))
  360. return -EACCES;
  361. tmp->prot = prot;
  362. if (tmp->file && (prot & PROT_WRITE))
  363. tmp->flags |= VMA_TAINTED;
  364. return 0;
  365. }
  366. /* split case
  367. * it is Unlikely that a process does an partical unmap
  368. * but We take care of it by splitting the book-keep
  369. *
  370. * case 1 if the vma is entirely between a mapped area .e.g See case:
  371. * ---unmap--
  372. * ------map-----------
  373. */
  374. tmp = __lookup_supervma(addr, length, NULL);
  375. if (tmp) {
  376. if (!check_vma_flags(tmp, flags))
  377. return -EACCES;
  378. int before_length = addr - tmp->addr;
  379. int after_length = tmp->addr + tmp->length - addr - length;
  380. int after_offset = tmp->file ? tmp->offset + tmp->length -
  381. after_length : 0;
  382. int inside_offset = tmp->file ? tmp->offset + before_length : 0;
  383. /* split the handler first, because we might call bkeep_mmap */
  384. tmp->addr = (void *) addr;
  385. tmp->length = length;
  386. if (before_length) {
  387. ret = __bkeep_mmap((void *) addr - before_length, before_length,
  388. tmp->prot, tmp->flags,
  389. tmp->file, tmp->offset,
  390. tmp->comment);
  391. if (ret < 0)
  392. return ret;
  393. }
  394. if (after_length) {
  395. ret = __bkeep_mmap((void *)addr + length, after_length,
  396. tmp->prot, tmp->flags,
  397. tmp->file, after_offset,
  398. tmp->comment);
  399. if (ret < 0)
  400. return ret;
  401. }
  402. tmp->prot = prot;
  403. tmp->offset = inside_offset;
  404. if (tmp->file && (prot & PROT_WRITE))
  405. tmp->flags |= VMA_TAINTED;
  406. return 0;
  407. }
  408. /* split case
  409. * if the unmap are in between to mapped
  410. * area then we need to split two VMA here
  411. * This is the most unlikely case
  412. *
  413. * case 2
  414. * ------unmap------
  415. * ----map1-----;-----map2-------
  416. *
  417. * TODO: this algorithm is very inefficient, and may change
  418. * the mapping if it fails
  419. */
  420. int o_length = length;
  421. while (length) {
  422. struct shim_vma * candidate = NULL;
  423. list_for_each_entry(tmp, &vma_list, list) {
  424. if (test_vma_contain (tmp, addr, 1)) {
  425. if (!check_vma_flags(tmp, flags))
  426. return -EACCES;
  427. int before_length = addr - tmp->addr;
  428. int after_length = tmp->addr + tmp->length > addr + length ?
  429. tmp->addr + tmp->length - addr - length : 0;
  430. int after_offset = tmp->file ? tmp->offset + tmp->length -
  431. after_length : 0;
  432. int inside_length = tmp->addr + tmp->length > addr + length ?
  433. length :
  434. addr + length - tmp->addr - tmp->length;
  435. int inside_offset = tmp->file ? tmp->offset + before_length : 0;
  436. /* split the handler first, because we might call bkeep_mmap */
  437. tmp->addr = (void *) addr;
  438. tmp->length = inside_length;
  439. if (before_length) {
  440. ret = __bkeep_mmap((void *) addr - before_length, before_length,
  441. tmp->prot, tmp->flags,
  442. tmp->file, tmp->offset,
  443. tmp->comment);
  444. if (ret < 0)
  445. return ret;
  446. }
  447. if (after_length) {
  448. ret = __bkeep_mmap((void *) addr + length, after_length,
  449. tmp->prot, tmp->flags,
  450. tmp->file, after_offset,
  451. tmp->comment);
  452. if (ret < 0)
  453. return ret;
  454. }
  455. tmp->prot = prot;
  456. tmp->offset = inside_offset;
  457. if (tmp->file && (prot & PROT_WRITE))
  458. tmp->flags |= VMA_TAINTED;
  459. addr += inside_length;
  460. length -= inside_length;
  461. break;
  462. }
  463. if (test_vma_startin(tmp, addr, length))
  464. if (!candidate || candidate->addr > tmp->addr)
  465. candidate = tmp;
  466. }
  467. if (o_length == length) {
  468. if (!candidate) {
  469. /* no more vmas, protect the whole area */
  470. ret = __bkeep_mmap((void *) addr, length, prot,
  471. VMA_UNMAPPED|(flags ? *flags : 0),
  472. NULL, 0, NULL);
  473. if (ret < 0)
  474. return ret;
  475. candidate = __lookup_vma((void *) addr, length);
  476. assert(candidate);
  477. }
  478. length -= candidate->addr - addr;
  479. }
  480. o_length = length;
  481. }
  482. return 0;
  483. }
  484. int bkeep_mprotect (void * addr, size_t length, int prot, const int * flags)
  485. {
  486. if (!addr || !length)
  487. return -EINVAL;
  488. lock(vma_list_lock);
  489. int ret = __bkeep_mprotect(addr, length, prot, flags);
  490. assert_vma();
  491. unlock(vma_list_lock);
  492. return ret;
  493. }
  494. static void __set_heap_top (void * bottom, void * top)
  495. {
  496. bottom += DEFAULT_HEAP_MIN_SIZE;
  497. if (bottom >= top) {
  498. heap_top = top;
  499. return;
  500. }
  501. unsigned long rand;
  502. while (getrand(&rand, sizeof(unsigned long)) < sizeof(unsigned long));
  503. rand %= (unsigned long) (top - bottom) / allocsize;
  504. heap_top = bottom + rand * allocsize;
  505. debug("heap top adjusted to %p\n", heap_top);
  506. }
  507. void * get_unmapped_vma (size_t length, int flags)
  508. {
  509. struct shim_vma * new = get_new_vma(), * tmp = NULL;
  510. if (!new)
  511. return NULL;
  512. lock(vma_list_lock);
  513. do {
  514. new->addr = heap_top - length;
  515. new->length = length;
  516. new->flags = flags|VMA_UNMAPPED;
  517. list_for_each_entry_reverse(tmp, &vma_list, list) {
  518. if (new->addr >= tmp->addr + tmp->length)
  519. break;
  520. if (new->addr < heap_bottom)
  521. break;
  522. if (new->addr > tmp->addr - length)
  523. new->addr = tmp->addr - length;
  524. }
  525. if (new->addr < heap_bottom) {
  526. if (heap_top == PAL_CB(user_address.end)) {
  527. unlock(vma_list_lock);
  528. put_vma(new);
  529. return NULL;
  530. } else {
  531. __set_heap_top(heap_top, (void *) PAL_CB(user_address.end));
  532. new->addr = NULL;
  533. }
  534. }
  535. } while (!new->addr);
  536. get_vma(new);
  537. list_add(&new->list, tmp ? &tmp->list : &vma_list);
  538. unlock(vma_list_lock);
  539. return new->addr;
  540. }
  541. /* This might not give the same vma but we might need to
  542. split after we find something */
  543. static struct shim_vma * __lookup_overlap_vma (const void * addr, size_t length,
  544. struct shim_vma ** pprev)
  545. {
  546. struct shim_vma * tmp, * prev = NULL;
  547. list_for_each_entry(tmp, &vma_list, list) {
  548. if (test_vma_overlap (tmp, addr, length)) {
  549. if (pprev)
  550. *pprev = prev;
  551. return tmp;
  552. }
  553. /* Assert we are really sorted */
  554. assert(!prev || prev->addr < tmp->addr);
  555. /* Insert in order; break once we are past the appropriate point */
  556. if (tmp->addr > addr)
  557. break;
  558. prev = tmp;
  559. }
  560. if (pprev)
  561. *pprev = prev;
  562. return NULL;
  563. }
  564. int lookup_overlap_vma (const void * addr, size_t length,
  565. struct shim_vma ** vma)
  566. {
  567. struct shim_vma * tmp = NULL;
  568. lock(vma_list_lock);
  569. if ((tmp = __lookup_overlap_vma(addr, length, NULL)) && vma)
  570. get_vma((tmp));
  571. unlock(vma_list_lock);
  572. if (vma)
  573. *vma = tmp;
  574. return tmp ? 0: -ENOENT;
  575. }
  576. static struct shim_vma * __lookup_vma (const void * addr, size_t length)
  577. {
  578. struct shim_vma * tmp;
  579. struct shim_vma * prev __attribute__((unused)) = NULL;
  580. list_for_each_entry(tmp, &vma_list, list) {
  581. if (test_vma_equal(tmp, addr, length))
  582. return tmp;
  583. /* Assert we are really sorted */
  584. assert(!prev || prev->addr + prev->length <= tmp->addr);
  585. prev = tmp;
  586. }
  587. return NULL;
  588. }
  589. static struct shim_vma * __lookup_supervma (const void * addr, size_t length,
  590. struct shim_vma ** pprev)
  591. {
  592. struct shim_vma * tmp, * prev = NULL;
  593. list_for_each_entry(tmp, &vma_list, list) {
  594. if (test_vma_contain(tmp, addr, length)) {
  595. if (pprev)
  596. *pprev = prev;
  597. return tmp;
  598. }
  599. /* Assert we are really sorted */
  600. assert(!prev || prev->addr + prev->length <= tmp->addr);
  601. /* Insert in order; break once we are past the appropriate point */
  602. if (tmp->addr > addr)
  603. break;
  604. prev = tmp;
  605. }
  606. if (pprev)
  607. *pprev = prev;
  608. return NULL;
  609. }
  610. int lookup_supervma (const void * addr, size_t length, struct shim_vma ** vma)
  611. {
  612. struct shim_vma * tmp = NULL;
  613. lock(vma_list_lock);
  614. if ((tmp = __lookup_supervma(addr, length, NULL)) && vma)
  615. get_vma((tmp));
  616. unlock(vma_list_lock);
  617. if (vma)
  618. *vma = tmp;
  619. return tmp ? 0 : -ENOENT;
  620. }
  621. struct shim_vma * next_vma (struct shim_vma * vma)
  622. {
  623. struct shim_vma * tmp = vma;
  624. lock(vma_list_lock);
  625. if (!tmp) {
  626. if (!list_empty(&vma_list) &&
  627. (tmp = list_first_entry(&vma_list, struct shim_vma, list)))
  628. get_vma(tmp);
  629. unlock(vma_list_lock);
  630. return tmp;
  631. }
  632. if (tmp->list.next == &vma_list) {
  633. tmp = NULL;
  634. } else if (tmp->list.next == &tmp->list) {
  635. struct shim_vma * tmp2;
  636. tmp = NULL;
  637. list_for_each_entry(tmp2, &vma_list, list)
  638. if (tmp2->addr >= vma->addr) {
  639. tmp = tmp2;
  640. get_vma(tmp);
  641. break;
  642. }
  643. } else {
  644. tmp = list_entry(tmp->list.next, struct shim_vma, list);
  645. get_vma(tmp);
  646. }
  647. put_vma(vma);
  648. unlock(vma_list_lock);
  649. return tmp;
  650. }
  651. int dump_all_vmas (struct shim_thread * thread, char * buf, size_t size)
  652. {
  653. lock(vma_list_lock);
  654. struct shim_vma * vma;
  655. int cnt = 0;
  656. list_for_each_entry(vma, &vma_list, list) {
  657. void * start = vma->addr, * end = vma->addr + vma->length;
  658. if ((vma->flags & (VMA_INTERNAL|VMA_UNMAPPED)) && !vma->comment[0])
  659. continue;
  660. char prot[3] = {'-', '-', '-'};
  661. if (vma->prot & PROT_READ)
  662. prot[0] = 'r';
  663. if (vma->prot & PROT_WRITE)
  664. prot[1] = 'w';
  665. if (vma->prot & PROT_EXEC)
  666. prot[2] = 'x';
  667. if (vma->file) {
  668. int dev_major = 0, dev_minor = 0;
  669. unsigned long ino = vma->file->dentry ? vma->file->dentry->ino : 0;
  670. const char * name = "[unknown]";
  671. if (!qstrempty(&vma->file->path))
  672. name = qstrgetstr(&vma->file->path);
  673. cnt += snprintf(buf + cnt, size - cnt,
  674. start > (void *) 0xffffffff ? "%lx" : "%08x", start);
  675. cnt += snprintf(buf + cnt, size - cnt,
  676. end > (void *) 0xffffffff ? "-%lx" : "-%08x", end);
  677. cnt += snprintf(buf + cnt, size - cnt,
  678. " %c%c%cp %08x %02d:%02d %u %s\n",
  679. prot[0], prot[1], prot[2],
  680. vma->offset, dev_major, dev_minor, ino,
  681. name);
  682. } else {
  683. cnt += snprintf(buf + cnt, size - cnt,
  684. start > (void *) 0xffffffff ? "%lx" : "%08x", start);
  685. cnt += snprintf(buf + cnt, size - cnt,
  686. end > (void *) 0xffffffff ? "-%lx" : "-%08x", end);
  687. if (vma->comment[0])
  688. cnt += snprintf(buf + cnt, size - cnt,
  689. " %c%c%cp 00000000 00:00 0 [%s]\n",
  690. prot[0], prot[1], prot[2], vma->comment);
  691. else
  692. cnt += snprintf(buf + cnt, size - cnt,
  693. " %c%c%cp 00000000 00:00 0\n",
  694. prot[0], prot[1], prot[2]);
  695. }
  696. if (cnt >= size) {
  697. cnt = -EOVERFLOW;
  698. break;
  699. }
  700. }
  701. unlock(vma_list_lock);
  702. return cnt;
  703. }
  704. void unmap_all_vmas (void)
  705. {
  706. struct shim_thread * cur_thread = get_cur_thread();
  707. struct shim_vma * tmp, * n;
  708. void * start = NULL, * end = NULL;
  709. lock(vma_list_lock);
  710. list_for_each_entry_safe(tmp, n, &vma_list, list) {
  711. /* a adhoc vma can never be removed */
  712. if (tmp->flags & VMA_INTERNAL)
  713. continue;
  714. if (tmp->flags & VMA_UNMAPPED) {
  715. __remove_vma(tmp);
  716. continue;
  717. }
  718. if (cur_thread->stack &&
  719. test_vma_overlap(tmp, cur_thread->stack,
  720. cur_thread->stack_top - cur_thread->stack))
  721. continue;
  722. if (start == NULL)
  723. start = end = tmp->addr;
  724. if (end == tmp->addr) {
  725. end += tmp->length;
  726. __remove_vma(tmp);
  727. continue;
  728. }
  729. debug("removing vma %p - %p\n", start, end);
  730. DkVirtualMemoryFree(start, end - start);
  731. start = end = tmp->addr;
  732. end += tmp->length;
  733. __remove_vma(tmp);
  734. }
  735. if (start != NULL && start < end) {
  736. debug("removing vma %p - %p\n", start, end);
  737. DkVirtualMemoryFree(start, end - start);
  738. }
  739. unlock(vma_list_lock);
  740. }
  741. DEFINE_MIGRATE_FUNC(vma)
  742. MIGRATE_FUNC_BODY(vma)
  743. {
  744. assert(size == sizeof(struct shim_vma));
  745. struct shim_vma * vma = (struct shim_vma *) obj;
  746. struct shim_vma * new_vma = NULL;
  747. struct shim_handle * file = NULL;
  748. PAL_FLG pal_prot = PAL_PROT(vma->prot, 0);
  749. if (vma->file && recursive)
  750. __DO_MIGRATE(handle, vma->file, &file, 1);
  751. unsigned long off = ADD_TO_MIGRATE_MAP(obj, *offset, size);
  752. if (ENTRY_JUST_CREATED(off)) {
  753. off = ADD_OFFSET(sizeof(struct shim_vma));
  754. if (!dry) {
  755. new_vma = (struct shim_vma *) (base + off);
  756. memcpy(new_vma, vma, sizeof(struct shim_vma));
  757. new_vma->file = file;
  758. new_vma->received = 0;
  759. REF_SET(new_vma->ref_count, 0);
  760. INIT_LIST_HEAD(&new_vma->list);
  761. }
  762. if (recursive && NEED_MIGRATE_MEMORY(vma)) {
  763. void * send_addr = vma->addr;
  764. size_t send_size = vma->length;
  765. if (vma->file) {
  766. size_t file_len = get_file_size(vma->file);
  767. if (file_len >= 0 &&
  768. vma->offset + vma->length > file_len)
  769. send_size = file_len > vma->offset ?
  770. file_len - vma->offset : 0;
  771. }
  772. if (send_size) {
  773. bool protected = false;
  774. if (store->use_gipc) {
  775. #if HASH_GIPC == 1
  776. if (!dry && !(prot & PAL_PROT_READ)) {
  777. protected = true;
  778. DkVirtualMemoryProtect(send_addr, send_size,
  779. pal_prot|PAL_PROT_READ);
  780. }
  781. #endif /* HASH_GIPC == 1 */
  782. struct shim_gipc_entry * gipc;
  783. DO_MIGRATE_SIZE(gipc, send_addr, send_size, &gipc, false);
  784. if (!dry) {
  785. gipc->prot = vma->prot;
  786. gipc->vma = new_vma;
  787. }
  788. #if HASH_GIPC == 1
  789. if (protected)
  790. DkVirtualMemoryProtect(send_addr, send_size, pal_prot);
  791. #endif /* HASH_GIPC == 1 */
  792. } else {
  793. if (!dry && !(vma->prot & PROT_READ)) {
  794. protected = true;
  795. DkVirtualMemoryProtect(send_addr, send_size,
  796. pal_prot|PAL_PROT_READ);
  797. }
  798. struct shim_mem_entry * mem;
  799. DO_MIGRATE_SIZE(memory, send_addr, send_size, &mem, false);
  800. if (!dry) {
  801. mem->prot = vma->prot;
  802. mem->vma = new_vma;
  803. mem->need_alloc = true;
  804. }
  805. if (protected)
  806. DkVirtualMemoryProtect(send_addr, send_size, pal_prot);
  807. }
  808. }
  809. }
  810. ADD_FUNC_ENTRY(off);
  811. ADD_ENTRY(SIZE, sizeof(struct shim_vma));
  812. } else if (!dry)
  813. new_vma = (struct shim_vma *) (base + off);
  814. if (new_vma && objp)
  815. *objp = (void *) new_vma;
  816. }
  817. END_MIGRATE_FUNC
  818. RESUME_FUNC_BODY(vma)
  819. {
  820. unsigned long off = GET_FUNC_ENTRY();
  821. assert((size_t) GET_ENTRY(SIZE) == sizeof(struct shim_vma));
  822. struct shim_vma * vma = (struct shim_vma *) (base + off);
  823. struct shim_vma * tmp, * prev = NULL;
  824. int ret = 0;
  825. RESUME_REBASE(vma->file);
  826. RESUME_REBASE(vma->list);
  827. lock(vma_list_lock);
  828. tmp = __lookup_overlap_vma(vma->addr, vma->length, &prev);
  829. if (tmp) {
  830. if ((ret = __bkeep_munmap(vma->addr, vma->length, &vma->flags)) < 0)
  831. return ret;
  832. if (prev->list.next == &tmp->list &&
  833. tmp->addr < vma->addr)
  834. prev = tmp;
  835. }
  836. get_vma(vma);
  837. list_add(&vma->list, prev ? &prev->list : &vma_list);
  838. assert_vma();
  839. unlock(vma_list_lock);
  840. int allocated = vma->received;
  841. if (vma->flags & VMA_UNMAPPED)
  842. #ifdef DEBUG_RESUME
  843. goto no_map;
  844. #else
  845. return 0;
  846. #endif
  847. if (vma->file)
  848. get_handle(vma->file);
  849. if (allocated < vma->length && vma->file) {
  850. /* first try, use hstat to force it resumes pal handle */
  851. assert(vma->file->fs && vma->file->fs->fs_ops &&
  852. vma->file->fs->fs_ops->mmap);
  853. void * addr = vma->addr + allocated;
  854. int ret = vma->file->fs->fs_ops->mmap(vma->file, &addr,
  855. vma->length - allocated,
  856. vma->prot|PAL_PROT_WRITECOPY,
  857. vma->flags,
  858. vma->offset + allocated);
  859. if (ret < 0)
  860. return ret;
  861. if (!addr)
  862. return -ENOMEM;
  863. if (addr != vma->addr + allocated)
  864. return -EACCES;
  865. allocated = vma->length;
  866. }
  867. if (allocated < vma->length) {
  868. int pal_alloc_type = ((vma->flags & MAP_32BIT) ? PAL_ALLOC_32BIT : 0);
  869. int pal_prot = vma->prot;
  870. if (DkVirtualMemoryAlloc(vma->addr + allocated, vma->length - allocated,
  871. pal_alloc_type, pal_prot))
  872. allocated = vma->length;
  873. }
  874. if (allocated < vma->length)
  875. debug("vma %p-%p cannot be allocated!\n", vma->addr + allocated,
  876. vma->addr + vma->length);
  877. vma->received = allocated;
  878. #ifdef DEBUG_RESUME
  879. if (vma->file) {
  880. const char * type = "", * name = "";
  881. if (!qstrempty(&vma->file->path)) {
  882. type = ",path=";
  883. name = qstrgetstr(&vma->file->path);
  884. } else if (!qstrempty(&vma->file->uri)) {
  885. type = ",uri=";
  886. name = qstrgetstr(&vma->file->uri);
  887. }
  888. debug("vma: %p-%p,size=%d,prot=%08x,flags=%08x,offset=%d%s%s\n",
  889. vma->addr, vma->addr + vma->length, vma->length,
  890. vma->prot, vma->flags, vma->offset, type, name);
  891. } else {
  892. no_map:
  893. debug("vma: %p-%p,size=%d,prot=%08x,flags=%08x,offset=%d\n",
  894. vma->addr, vma->addr + vma->length, vma->length,
  895. vma->prot, vma->flags, vma->offset);
  896. }
  897. #endif /* DEBUG_RESUME */
  898. }
  899. END_RESUME_FUNC
  900. DEFINE_MIGRATE_FUNC(all_vmas)
  901. MIGRATE_FUNC_BODY(all_vmas)
  902. {
  903. lock(vma_list_lock);
  904. if (!list_empty(&vma_list)) {
  905. struct shim_vma * tmp =
  906. list_first_entry(&vma_list, struct shim_vma, list);
  907. while (tmp) {
  908. if (tmp->flags & VMA_INTERNAL)
  909. goto next;
  910. get_vma(tmp);
  911. unlock(vma_list_lock);
  912. DO_MIGRATE(vma, tmp, NULL, recursive);
  913. lock(vma_list_lock);
  914. put_vma(tmp);
  915. next:
  916. if (tmp->list.next == &vma_list)
  917. break;
  918. tmp = list_entry(tmp->list.next, struct shim_vma, list);
  919. }
  920. }
  921. unlock(vma_list_lock);
  922. }
  923. END_MIGRATE_FUNC
  924. RESUME_FUNC_BODY(all_vmas)
  925. {
  926. /* useless */
  927. }
  928. END_RESUME_FUNC
  929. void debug_print_vma_list (void)
  930. {
  931. sys_printf("vma bookkeeping:\n");
  932. struct shim_vma * vma;
  933. list_for_each_entry(vma, &vma_list, list) {
  934. const char * type = "", * name = "";
  935. if (vma->file) {
  936. if (!qstrempty(&vma->file->path)) {
  937. type = " path=";
  938. name = qstrgetstr(&vma->file->path);
  939. } else if (!qstrempty(&vma->file->uri)) {
  940. type = " uri=";
  941. name = qstrgetstr(&vma->file->uri);
  942. }
  943. }
  944. sys_printf("[%p-%p] prot=%08x flags=%08x%s%s offset=%d%s%s%s%s\n",
  945. vma->addr, vma->addr + vma->length,
  946. vma->prot,
  947. vma->flags & ~(VMA_INTERNAL|VMA_UNMAPPED|VMA_TAINTED),
  948. type, name,
  949. vma->offset,
  950. vma->flags & VMA_INTERNAL ? " (internal)" : "",
  951. vma->flags & VMA_UNMAPPED ? " (unmapped)" : "",
  952. vma->comment[0] ? " comment=" : "",
  953. vma->comment[0] ? vma->comment : "");
  954. }
  955. }
  956. void print_vma_hash (struct shim_vma * vma, void * addr, int len,
  957. bool force_protect)
  958. {
  959. if (!addr)
  960. addr = vma->addr;
  961. if (!len)
  962. len = vma->length - (addr - vma->addr);
  963. if (addr < vma->addr || addr + len > vma->addr + vma->length)
  964. return;
  965. if (!(vma->prot & PROT_READ)) {
  966. if (!force_protect)
  967. return;
  968. DkVirtualMemoryProtect(vma->addr, vma->length, PAL_PROT_READ);
  969. }
  970. for (unsigned long p = (unsigned long) addr ;
  971. p < (unsigned long) addr + len ; p += allocsize) {
  972. unsigned long hash = 0;
  973. struct shim_md5_ctx ctx;
  974. md5_init(&ctx);
  975. md5_update(&ctx, (void *) p, allocsize);
  976. md5_final(&ctx);
  977. memcpy(&hash, ctx.digest, sizeof(unsigned long));
  978. }
  979. if (!(vma->prot & PROT_READ))
  980. DkVirtualMemoryProtect(vma->addr, vma->length, vma->prot);
  981. }