graphene-ipc.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. #include <linux/bitmap.h>
  2. #include <linux/fs.h>
  3. #include <linux/init.h>
  4. #include <linux/kallsyms.h>
  5. #include <linux/miscdevice.h>
  6. #include <linux/mm.h>
  7. #include <linux/mm_types.h>
  8. #include <linux/mmu_notifier.h>
  9. #include <linux/module.h>
  10. #include <linux/pagemap.h>
  11. #include <linux/sched.h>
  12. #include <linux/slab.h>
  13. #include <linux/swap.h>
  14. #include <linux/swapops.h>
  15. #include <linux/version.h>
  16. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  17. #include <linux/sched/signal.h>
  18. #endif
  19. #include <asm/mman.h>
  20. #include <asm/tlb.h>
  21. #include "graphene-ipc.h"
  22. #include "ksyms.h"
  23. MODULE_LICENSE("Dual BSD/GPL");
  24. #define FILE_POISON LIST_POISON1
  25. struct kmem_cache* gipc_queue_cachep;
  26. struct kmem_cache* gipc_send_buffer_cachep;
  27. #define GIPC_DEBUG 0
  28. #if defined(GIPC_DEBUG) && GIPC_DEBUG == 1
  29. #define DEBUG(...) printk(KERN_INFO __VA_ARGS__)
  30. #define GIPC_BUG_ON(cond) BUG_ON(cond)
  31. #else
  32. #define DEBUG(...)
  33. #define GIPC_BUG_ON(cond)
  34. #endif
  35. #if defined(CONFIG_GRAPHENE_BULK_IPC) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
  36. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  37. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  38. ({ \
  39. unsigned long populate; \
  40. unsigned long rv = do_mmap_pgoff(file, addr, len, prot, flags, pgoff, &populate); \
  41. rv; \
  42. })
  43. #else
  44. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  45. do_mmap_pgoff(file, addr, len, prot, flags, pgoff)
  46. #endif /* kernel_version < 3.9.0 */
  47. #else
  48. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  49. #define MY_DO_MMAP
  50. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  51. ({ \
  52. unsigned long populate; \
  53. unsigned long rv; \
  54. rv = KSYM(do_mmap)(file, addr, len, prot, flags, 0, pgoff, &populate, NULL); \
  55. rv; \
  56. })
  57. #elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
  58. #define MY_DO_MMAP
  59. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  60. ({ \
  61. unsigned long populate; \
  62. unsigned long rv; \
  63. rv = KSYM(do_mmap)(file, addr, len, prot, flags, 0, pgoff, &populate); \
  64. rv; \
  65. })
  66. #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  67. #define MY_DO_MMAP_PGOFF
  68. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  69. ({ \
  70. unsigned long populate; \
  71. unsigned long rv; \
  72. rv = KSYM(do_mmap_pgoff)(file, addr, len, prot, flags, pgoff, &populate); \
  73. rv; \
  74. })
  75. #else
  76. #define MY_DO_MMAP_PGOFF
  77. #define DO_MMAP_PGOFF(file, addr, len, prot, flags, pgoff) \
  78. KSYM(do_mmap_pgoff)(file, addr, len, prot, flags, pgoff)
  79. #endif /* kernel version < 3.9 */
  80. #endif /* !CONFIG_GRAPHENE_BULK_IPC && kernel version > 3.4.0 */
  81. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
  82. #ifdef CONFIG_GRAPHENE_BULK_IPC
  83. #define FLUSH_TLB_MM_RANGE flush_tlb_mm_range
  84. #else
  85. #define MY_FLUSH_TLB_MM_RANGE
  86. #define FLUSH_TLB_MM_RANGE KSYM(flush_tlb_mm_range)
  87. #endif
  88. #else /* LINUX_VERSION_CODE < 3.7.0 */
  89. #if defined(CONFIG_GRAPHENE_BULK_IPC) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 2, 0)
  90. #define FLUSH_TLB_PAGE flush_tlb_page
  91. #else
  92. #define MY_FLUSH_TLB_PAGE
  93. #define FLUSH_TLB_PAGE KSYM(flush_tlb_page)
  94. #endif
  95. #endif
  96. #ifdef MY_DO_MMAP
  97. IMPORT_KSYM(do_mmap);
  98. #endif
  99. #ifdef MY_DO_MMAP_PGOFF
  100. IMPORT_KSYM(do_mmap_pgoff);
  101. #endif
  102. #ifdef MY_FLUSH_TLB_MM_RANGE
  103. IMPORT_KSYM(flush_tlb_mm_range);
  104. #endif
  105. #ifdef MY_FLUSH_TLB_PAGE
  106. IMPORT_KSYM(flush_tlb_page);
  107. #endif
  108. #ifndef gipc_get_session
  109. u64 (*my_gipc_get_session)(struct task_struct*) = NULL;
  110. #endif
  111. struct gipc_queue {
  112. struct list_head list;
  113. s64 token;
  114. u64 owner;
  115. atomic_t count;
  116. struct mutex send_lock, recv_lock;
  117. wait_queue_head_t send, recv;
  118. volatile int next, last;
  119. struct {
  120. struct page* page;
  121. struct file* file;
  122. u64 pgoff;
  123. } pages[PAGE_QUEUE];
  124. };
  125. struct gipc_send_buffer {
  126. unsigned long page_bit_map[PAGE_BITS];
  127. struct page* pages[PAGE_QUEUE];
  128. struct vm_area_struct* vmas[PAGE_QUEUE];
  129. struct file* files[PAGE_QUEUE];
  130. unsigned long pgoffs[PAGE_QUEUE];
  131. };
  132. struct {
  133. spinlock_t lock;
  134. /*
  135. * For now, just make them monotonically increasing. XXX: At
  136. * some point, do something smarter for security.
  137. */
  138. u64 max_token;
  139. struct list_head channels; // gipc_queue structs
  140. } gdev;
  141. #ifdef gipc_get_session
  142. #define GIPC_OWNER gipc_get_session(current)
  143. #else
  144. #define GIPC_OWNER (my_gipc_get_session ? my_gipc_get_session(current) : 0)
  145. #endif
  146. static inline struct gipc_queue* create_gipc_queue(struct file* creator) {
  147. struct gipc_queue* gq = kmem_cache_alloc(gipc_queue_cachep, GFP_KERNEL);
  148. if (!gq)
  149. return gq;
  150. memset(gq, 0, sizeof(*gq));
  151. INIT_LIST_HEAD(&gq->list);
  152. mutex_init(&gq->send_lock);
  153. mutex_init(&gq->recv_lock);
  154. init_waitqueue_head(&gq->send);
  155. init_waitqueue_head(&gq->recv);
  156. gq->owner = GIPC_OWNER;
  157. creator->private_data = gq;
  158. atomic_set(&gq->count, 1);
  159. spin_lock(&gdev.lock);
  160. list_add(&gq->list, &gdev.channels);
  161. gq->token = gdev.max_token++;
  162. spin_unlock(&gdev.lock);
  163. return gq;
  164. }
  165. static inline void release_gipc_queue(struct gipc_queue* gq, bool locked) {
  166. int idx;
  167. if (!atomic_dec_and_test(&gq->count))
  168. return;
  169. if (!locked)
  170. spin_lock(&gdev.lock);
  171. while (gq->next != gq->last) {
  172. idx = gq->next;
  173. if (gq->pages[idx].page) {
  174. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
  175. put_page(gq->pages[idx].page);
  176. #else
  177. page_cache_release(gq->pages[idx].page);
  178. #endif
  179. gq->pages[idx].page = NULL;
  180. }
  181. if (gq->pages[idx].file) {
  182. fput_atomic(gq->pages[idx].file);
  183. gq->pages[idx].file = NULL;
  184. gq->pages[idx].pgoff = 0;
  185. }
  186. gq->next++;
  187. gq->next &= (PAGE_QUEUE - 1);
  188. }
  189. list_del(&gq->list);
  190. if (!locked)
  191. spin_unlock(&gdev.lock);
  192. kmem_cache_free(gipc_queue_cachep, gq);
  193. }
  194. #if defined(SPLIT_RSS_COUNTING)
  195. static void add_mm_counter_fast(struct mm_struct* mm, int member, int val) {
  196. struct task_struct* task = current;
  197. if (likely(task->mm == mm))
  198. task->rss_stat.count[member] += val;
  199. else
  200. add_mm_counter(mm, member, val);
  201. }
  202. #else
  203. #define add_mm_counter_fast(mm, member, val) add_mm_counter(mm, member, val)
  204. #endif
  205. #define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
  206. #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
  207. inline int make_page_cow(struct mm_struct* mm, struct vm_area_struct* vma, unsigned long addr) {
  208. pgd_t* pgd;
  209. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  210. p4d_t* p4d;
  211. #endif
  212. pud_t* pud;
  213. pmd_t* pmd;
  214. pte_t* pte;
  215. spinlock_t* ptl;
  216. pgd = pgd_offset(mm, addr);
  217. if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
  218. goto no_page;
  219. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  220. p4d = p4d_offset(pgd, addr);
  221. if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
  222. goto no_page;
  223. pud = pud_offset(p4d, addr);
  224. #else
  225. pud = pud_offset(pgd, addr);
  226. #endif
  227. if (pud_none(*pud) || unlikely(pud_bad(*pud)))
  228. goto no_page;
  229. pmd = pmd_offset(pud, addr);
  230. if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
  231. goto no_page;
  232. BUG_ON(pmd_trans_huge(*pmd));
  233. pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
  234. if (!pte_present(*pte)) {
  235. spin_unlock(ptl);
  236. goto no_page;
  237. }
  238. ptep_set_wrprotect(mm, addr, pte);
  239. spin_unlock(ptl);
  240. DEBUG("make page COW at %lx\n", addr);
  241. return 0;
  242. no_page:
  243. return -EFAULT;
  244. }
  245. static void fill_page_bit_map(struct mm_struct* mm, unsigned long addr, unsigned long nr_pages,
  246. unsigned long page_bit_map[PAGE_BITS]) {
  247. int i = 0;
  248. DEBUG("GIPC_SEND fill_page_bit_map %lx - %lx\n", addr, addr + (nr_pages << PAGE_SHIFT));
  249. do {
  250. struct vm_area_struct* vma;
  251. pgd_t* pgd;
  252. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  253. p4d_t* p4d;
  254. #endif
  255. pud_t* pud;
  256. pmd_t* pmd;
  257. pte_t* pte;
  258. spinlock_t* ptl;
  259. bool has_page = false;
  260. vma = find_vma(mm, addr);
  261. if (!vma)
  262. goto next;
  263. BUG_ON(vma->vm_flags & VM_HUGETLB);
  264. pgd = pgd_offset(mm, addr);
  265. if (pgd_none(*pgd) || pgd_bad(*pgd))
  266. goto next;
  267. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
  268. p4d = p4d_offset(pgd, addr);
  269. if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
  270. goto next;
  271. pud = pud_offset(p4d, addr);
  272. #else
  273. pud = pud_offset(pgd, addr);
  274. #endif
  275. if (pud_none(*pud) || pud_bad(*pud))
  276. goto next;
  277. pmd = pmd_offset(pud, addr);
  278. if (pmd_none(*pmd))
  279. goto next;
  280. if (unlikely(pmd_trans_huge(*pmd))) {
  281. has_page = true;
  282. goto next;
  283. }
  284. if (pmd_bad(*pmd))
  285. goto next;
  286. pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
  287. if (pte_none(*pte))
  288. goto next_locked;
  289. #if 0
  290. if (unlikely(!pte_present(*pte)) && pte_file(*pte))
  291. goto next_locked;
  292. #endif
  293. has_page = true;
  294. next_locked:
  295. spin_unlock(ptl);
  296. next:
  297. if (has_page) {
  298. DEBUG("found a page at %lx\n", addr);
  299. set_bit(i, page_bit_map);
  300. } else {
  301. clear_bit(i, page_bit_map);
  302. }
  303. } while (i++, addr += PAGE_SIZE, i < nr_pages);
  304. }
  305. static int get_pages(struct task_struct* task, unsigned long start, unsigned long nr_pages,
  306. unsigned long page_bit_map[PAGE_BITS], struct page* pages[PAGE_QUEUE],
  307. struct vm_area_struct* vmas[PAGE_QUEUE]) {
  308. struct mm_struct* mm = task->mm;
  309. struct vm_area_struct* vma = NULL;
  310. unsigned long addr = start, nr;
  311. int i = 0, j, rv;
  312. while (i < nr_pages) {
  313. unsigned long flushed, vmflags;
  314. int last = i;
  315. if (test_bit(last, page_bit_map)) {
  316. i = find_next_zero_bit(page_bit_map, PAGE_QUEUE, last + 1);
  317. if (i > nr_pages)
  318. i = nr_pages;
  319. nr = i - last;
  320. DEBUG("GIPC_SEND get_user_pages %ld pages at %lx\n", addr, nr);
  321. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
  322. rv = get_user_pages(addr, nr, FOLL_GET | FOLL_FORCE | FOLL_SPLIT, pages + last,
  323. vmas + last);
  324. #else
  325. rv = __get_user_pages(task, mm, addr, nr, FOLL_GET | FOLL_FORCE | FOLL_SPLIT,
  326. pages + last, vmas + last, NULL);
  327. #endif
  328. if (rv <= 0) {
  329. printk(KERN_ERR "Graphene error: get_user_pages at 0x%016lx-0x%016lx\n",
  330. addr, addr + (nr << PAGE_SHIFT));
  331. return rv;
  332. }
  333. if (rv != nr) {
  334. printk(KERN_ERR "Graphene error: get_user_pages at 0x%016lx\n",
  335. addr + (rv << PAGE_SHIFT));
  336. return -EACCES;
  337. }
  338. flushed = addr;
  339. vmflags = 0;
  340. for (j = 0; j < nr; j++) {
  341. unsigned long target = addr + (j << PAGE_SHIFT);
  342. /* Mark source COW */
  343. rv = make_page_cow(mm, vmas[last + j], target);
  344. if (rv)
  345. return rv;
  346. if (PageAnon(pages[last + j])) {
  347. /* Fix up the counters */
  348. inc_mm_counter_fast(mm, MM_FILEPAGES);
  349. dec_mm_counter_fast(mm, MM_ANONPAGES);
  350. pages[last + j]->mapping = NULL;
  351. }
  352. #ifdef FLUSH_TLB_MM_RANGE
  353. if (vmflags == vmas[last + j]->vm_flags)
  354. continue;
  355. if (flushed < target)
  356. FLUSH_TLB_MM_RANGE(mm, flushed, target, vmflags);
  357. flushed = target;
  358. vmflags = vmas[last + j]->vm_flags;
  359. #else
  360. FLUSH_TLB_PAGE(vmas[last + j], target);
  361. #endif
  362. }
  363. #ifdef FLUSH_TLB_MM_RANGE
  364. if (flushed < addr + (nr << PAGE_SHIFT))
  365. FLUSH_TLB_MM_RANGE(mm, flushed, addr + (nr << PAGE_SHIFT), vmflags);
  366. #endif
  367. vma = vmas[i - 1];
  368. addr += nr << PAGE_SHIFT;
  369. } else {
  370. /* This is the case where a page (or pages) are not
  371. * currently mapped.
  372. * Handle the hole appropriately. */
  373. i = find_next_bit(page_bit_map, PAGE_QUEUE, last + 1);
  374. if (i > nr_pages)
  375. i = nr_pages;
  376. nr = i - last;
  377. DEBUG("GIPC_SEND skip %ld pages at %lx\n", addr, nr);
  378. for (j = 0; j < nr; j++) {
  379. if (!vma) {
  380. vma = find_vma(mm, addr);
  381. } else {
  382. /* DEP 6/17/13 - these addresses should
  383. * be monotonically increasing. */
  384. for (; vma && addr >= vma->vm_end; vma = vma->vm_next)
  385. ;
  386. /* Leverage monotonic increasing vmas
  387. * to more quickly detect holes in the
  388. * address space. */
  389. if (vma && addr < vma->vm_start)
  390. vma = NULL;
  391. }
  392. pages[last + j] = NULL;
  393. vmas[last + j] = vma;
  394. addr += PAGE_SIZE;
  395. }
  396. }
  397. }
  398. return i;
  399. }
  400. static int do_gipc_send(struct task_struct* task, struct gipc_queue* gq,
  401. struct gipc_send_buffer* gbuf, unsigned long __user* uaddr,
  402. unsigned long __user* ulen, unsigned long* copied_pages) {
  403. struct mm_struct* mm = task->mm;
  404. unsigned long addr, len, nr_pages;
  405. int rv, i;
  406. DEBUG("GIPC_SEND uaddr = %p, ulen = %p\n", uaddr, ulen);
  407. rv = copy_from_user(&addr, uaddr, sizeof(unsigned long));
  408. if (rv) {
  409. printk(KERN_ALERT "Graphene SEND: bad buffer %p\n", uaddr);
  410. return -EFAULT;
  411. }
  412. rv = copy_from_user(&len, ulen, sizeof(unsigned long));
  413. if (rv) {
  414. printk(KERN_ALERT "Graphene SEND: bad buffer %p\n", ulen);
  415. return -EFAULT;
  416. }
  417. if (addr > addr + len) {
  418. printk(KERN_ALERT
  419. "Graphene SEND: attempt to send %p - %p by thread %d FAIL: bad argument\n",
  420. (void*)addr, (void*)(addr + len), task->pid);
  421. return -EINVAL;
  422. }
  423. DEBUG("GIPC_SEND addr = %lx, len = %ld\n", addr, len);
  424. nr_pages = len >> PAGE_SHIFT;
  425. if (!access_ok(VERIFY_READ, addr, len)) {
  426. printk(KERN_ALERT "Graphene SEND: attempt to send %p - %p (%ld pages) by thread %d FAIL: "
  427. "bad permission\n",
  428. (void*)addr, (void*)(addr + len), nr_pages, task->pid);
  429. return -EFAULT;
  430. }
  431. DEBUG(" %p - %p (%ld pages) sent by thread %d\n", (void*)addr, (void*)(addr + len), nr_pages,
  432. task->pid);
  433. while (nr_pages) {
  434. unsigned long nr = (nr_pages <= PAGE_QUEUE) ? nr_pages : PAGE_QUEUE;
  435. /* for each of these addresses - check if
  436. * demand faulting will be triggered
  437. * if vma is present, but there is no page
  438. * present(pmd/pud not present or PTE_PRESENT
  439. * is off) then get_user_pages will trigger
  440. * the creation of those */
  441. down_write(&mm->mmap_sem);
  442. fill_page_bit_map(mm, addr, nr, gbuf->page_bit_map);
  443. rv = get_pages(task, addr, nr, gbuf->page_bit_map, gbuf->pages, gbuf->vmas);
  444. if (rv < 0) {
  445. up_write(&mm->mmap_sem);
  446. break;
  447. }
  448. for (i = 0; i < nr; i++) {
  449. BUG_ON((!gbuf->vmas[i]) && (!!gbuf->pages[i]));
  450. if (gbuf->vmas[i] && gbuf->vmas[i]->vm_file) {
  451. gbuf->files[i] = get_file(gbuf->vmas[i]->vm_file);
  452. gbuf->pgoffs[i] =
  453. ((addr - gbuf->vmas[i]->vm_start) >> PAGE_SHIFT) + gbuf->vmas[i]->vm_pgoff;
  454. } else {
  455. gbuf->files[i] = NULL;
  456. gbuf->pgoffs[i] = 0;
  457. }
  458. addr += PAGE_SIZE;
  459. }
  460. up_write(&mm->mmap_sem);
  461. for (i = 0; i < nr; i++) {
  462. /* Put in the pending buffer*/
  463. if (((gq->last + 1) & (PAGE_QUEUE - 1)) == gq->next) {
  464. /* The blocking condition for send
  465. * and recv can't both be true! */
  466. wake_up_all(&gq->recv);
  467. wait_event_interruptible(gq->send, ((gq->last + 1) & (PAGE_QUEUE - 1)) != gq->next);
  468. if (signal_pending(task)) {
  469. rv = -ERESTARTSYS;
  470. goto out;
  471. }
  472. }
  473. gq->pages[gq->last].page = gbuf->pages[i];
  474. gq->pages[gq->last].file = gbuf->files[i];
  475. gq->pages[gq->last].pgoff = gbuf->pgoffs[i];
  476. gq->last++;
  477. gq->last &= PAGE_QUEUE - 1;
  478. (*copied_pages)++;
  479. }
  480. wake_up_all(&gq->recv);
  481. nr_pages -= nr;
  482. }
  483. out:
  484. return rv;
  485. }
  486. static inline int recv_next(struct task_struct* task, struct gipc_queue* gq) {
  487. if (gq->next == gq->last) {
  488. /* The blocking condition for send & recv can't both be true */
  489. wake_up_all(&gq->send);
  490. wait_event_interruptible(gq->recv, gq->next != gq->last);
  491. if (signal_pending(task))
  492. return -ERESTARTSYS;
  493. }
  494. return gq->next;
  495. }
  496. static int do_gipc_recv(struct task_struct* task, struct gipc_queue* gq,
  497. unsigned long __user* uaddr, unsigned long __user* ulen,
  498. unsigned long __user* uprot, unsigned long* copied_pages) {
  499. struct mm_struct* mm = task->mm;
  500. struct vm_area_struct* vma = NULL;
  501. unsigned long start, addr, len, nr_pages, prot, pgoff;
  502. struct page* page = NULL;
  503. struct file* file = NULL;
  504. int i = 0, rv;
  505. rv = copy_from_user(&addr, uaddr, sizeof(unsigned long));
  506. if (rv) {
  507. printk(KERN_ALERT "Graphene RECV: bad buffer %p\n", uaddr);
  508. return -EFAULT;
  509. }
  510. rv = copy_from_user(&len, ulen, sizeof(unsigned long));
  511. if (rv) {
  512. printk(KERN_ALERT "Graphene RECV: bad buffer %p\n", ulen);
  513. return -EFAULT;
  514. }
  515. rv = copy_from_user(&prot, uprot, sizeof(unsigned long));
  516. if (rv) {
  517. printk(KERN_ALERT "Graphene RECV: bad buffer %p\n", uprot);
  518. return -EFAULT;
  519. }
  520. nr_pages = len >> PAGE_SHIFT;
  521. start = addr;
  522. down_write(&mm->mmap_sem);
  523. while (i < nr_pages) {
  524. int found = recv_next(task, gq);
  525. int need_map = 1;
  526. if (found < 0) {
  527. rv = found;
  528. goto finish;
  529. }
  530. page = gq->pages[found].page;
  531. file = gq->pages[found].file;
  532. pgoff = gq->pages[found].pgoff;
  533. gq->next++;
  534. gq->next &= PAGE_QUEUE - 1;
  535. wake_up_all(&gq->send);
  536. if (vma) {
  537. need_map = 0;
  538. if (vma->vm_file != file)
  539. need_map = 1;
  540. if (file && vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT) != addr)
  541. need_map = 1;
  542. if (prot != (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
  543. need_map = 1;
  544. }
  545. if (need_map) {
  546. unsigned long flags = MAP_PRIVATE;
  547. if (addr)
  548. flags |= MAP_FIXED;
  549. if (file)
  550. flags |= MAP_FILE;
  551. else
  552. flags |= MAP_ANONYMOUS;
  553. addr = DO_MMAP_PGOFF(file, addr, (nr_pages - i) << PAGE_SHIFT, prot, flags, pgoff);
  554. if (IS_ERR_VALUE(addr)) {
  555. rv = PTR_ERR((void*)addr);
  556. printk(KERN_ERR "Graphene error: failed to mmap (%d)\n", -rv);
  557. goto finish;
  558. }
  559. if (file)
  560. DEBUG("map %08lx-%08lx file %p\n", addr, addr + ((nr_pages - i) << PAGE_SHIFT),
  561. file);
  562. else
  563. DEBUG("map %08lx-%08lx\n", addr, addr + ((nr_pages - i) << PAGE_SHIFT));
  564. if (!start)
  565. start = addr;
  566. vma = find_vma(mm, addr);
  567. if (!vma) {
  568. printk(KERN_ERR "Graphene error: can't find vma at %p\n", (void*)addr);
  569. rv = -ENOENT;
  570. goto finish;
  571. }
  572. } else {
  573. BUG_ON(!vma);
  574. }
  575. if (page) {
  576. rv = vm_insert_page(vma, addr, page);
  577. if (rv) {
  578. printk(KERN_ERR "Graphene error: fail to insert page %d\n", rv);
  579. goto finish;
  580. }
  581. rv = make_page_cow(mm, vma, addr);
  582. if (rv) {
  583. printk(KERN_ERR "Graphene error: can't make vma copy-on-write at %p\n",
  584. (void*)addr);
  585. goto finish;
  586. }
  587. }
  588. finish:
  589. /* Drop the kernel's reference to this page */
  590. if (page)
  591. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
  592. put_page(page);
  593. #else
  594. page_cache_release(page);
  595. #endif
  596. if (file)
  597. fput_atomic(file);
  598. if (rv)
  599. break;
  600. i++;
  601. addr += PAGE_SIZE;
  602. (*copied_pages)++;
  603. }
  604. up_write(&mm->mmap_sem);
  605. if (i)
  606. DEBUG(" %p - %p (%d pages) received by thread %d\n", (void*)start,
  607. (void*)start + (i << PAGE_SHIFT), i, task->pid);
  608. if (start) {
  609. rv = copy_to_user(uaddr, &start, sizeof(unsigned long));
  610. if (rv) {
  611. printk(KERN_ERR "Graphene error: bad buffer %p\n", uaddr);
  612. return -EFAULT;
  613. }
  614. }
  615. return rv;
  616. }
  617. static long gipc_ioctl(struct file* file, unsigned int cmd, unsigned long arg) {
  618. struct task_struct* task = current;
  619. struct gipc_queue* gq = NULL;
  620. long rv = 0;
  621. switch (cmd) {
  622. case GIPC_SEND: {
  623. struct gipc_send gs;
  624. struct gipc_send_buffer* gbuf;
  625. int i;
  626. unsigned long nr_pages = 0;
  627. rv = copy_from_user(&gs, (void*)arg, sizeof(gs));
  628. if (rv) {
  629. printk(KERN_ALERT "Graphene SEND: bad buffer %p\n", (void*)arg);
  630. return -EFAULT;
  631. }
  632. /* Find/allocate the gipc_pages struct for our recipient */
  633. gq = (struct gipc_queue*)file->private_data;
  634. if (!gq)
  635. return -EFAULT;
  636. gbuf = kmem_cache_alloc(gipc_send_buffer_cachep, GFP_KERNEL);
  637. if (!gbuf)
  638. return -ENOMEM;
  639. DEBUG("GIPC_SEND %ld entries to token %lld by thread %d\n", gs.entries, gq->token,
  640. task->pid);
  641. mutex_lock(&gq->send_lock);
  642. for (i = 0; i < gs.entries; i++) {
  643. rv = do_gipc_send(task, gq, gbuf, gs.addr + i, gs.len + i, &nr_pages);
  644. if (rv < 0)
  645. break;
  646. }
  647. mutex_unlock(&gq->send_lock);
  648. DEBUG("GIPC_SEND return to thread %d, %ld pages are sent\n", task->pid, nr_pages);
  649. kmem_cache_free(gipc_send_buffer_cachep, gbuf);
  650. rv = nr_pages ?: rv;
  651. break;
  652. }
  653. case GIPC_RECV: {
  654. struct gipc_recv gr;
  655. int i;
  656. unsigned long nr_pages = 0;
  657. rv = copy_from_user(&gr, (void*)arg, sizeof(gr));
  658. if (rv) {
  659. printk(KERN_ERR "Graphene error: bad buffer %p\n", (void*)arg);
  660. return -EFAULT;
  661. }
  662. gq = (struct gipc_queue*)file->private_data;
  663. if (!gq)
  664. return -EBADF;
  665. DEBUG("GIPC_RECV %ld entries to token %lld by thread %d\n", gr.entries, gq->token,
  666. task->pid);
  667. mutex_lock(&gq->recv_lock);
  668. for (i = 0; i < gr.entries; i++) {
  669. rv = do_gipc_recv(task, gq, gr.addr + i, gr.len + i, gr.prot + i, &nr_pages);
  670. if (rv < 0)
  671. break;
  672. }
  673. mutex_unlock(&gq->recv_lock);
  674. DEBUG("GIPC_RECV return to thread %d, %ld pages are received\n", task->pid, nr_pages);
  675. rv = nr_pages ?: rv;
  676. break;
  677. }
  678. case GIPC_CREATE: {
  679. gq = create_gipc_queue(file);
  680. if (!gq) {
  681. rv = -ENOMEM;
  682. break;
  683. }
  684. DEBUG("GIPC_CREATE token %lld by thread %d\n", gq->token, task->pid);
  685. rv = gq->token;
  686. break;
  687. }
  688. case GIPC_JOIN: {
  689. struct gipc_queue* q;
  690. u64 token = arg;
  691. u64 session = GIPC_OWNER;
  692. if (file->private_data != NULL)
  693. return -EBUSY;
  694. /* Search for this token */
  695. spin_lock(&gdev.lock);
  696. list_for_each_entry(q, &gdev.channels, list) {
  697. if (q->token == token) {
  698. gq = q;
  699. break;
  700. }
  701. }
  702. /* Fail if we didn't find it */
  703. if (!gq) {
  704. spin_unlock(&gdev.lock);
  705. return -ENOENT;
  706. }
  707. if (gq->owner != session) {
  708. spin_unlock(&gdev.lock);
  709. return -EPERM;
  710. }
  711. atomic_inc(&gq->count);
  712. file->private_data = gq;
  713. /* Hold the lock until we allocate so only one process
  714. * gets the queue */
  715. spin_unlock(&gdev.lock);
  716. DEBUG("GIPC_JOIN token %lld by thread %d\n", token, task->pid);
  717. rv = 0;
  718. break;
  719. }
  720. default:
  721. printk(KERN_ALERT "Graphene unknown ioctl %u %lu\n", cmd, arg);
  722. rv = -ENOSYS;
  723. break;
  724. }
  725. return rv;
  726. }
  727. static int gipc_release(struct inode* inode, struct file* file) {
  728. struct gipc_queue* gq = (struct gipc_queue*)file->private_data;
  729. if (!gq)
  730. return 0;
  731. file->private_data = NULL;
  732. release_gipc_queue(gq, false);
  733. return 0;
  734. }
  735. static int gipc_open(struct inode* inode, struct file* file) {
  736. file->private_data = NULL;
  737. return 0;
  738. }
  739. static struct file_operations gipc_fops = {
  740. .owner = THIS_MODULE,
  741. .release = gipc_release,
  742. .open = gipc_open,
  743. .unlocked_ioctl = gipc_ioctl,
  744. .compat_ioctl = gipc_ioctl,
  745. .llseek = noop_llseek,
  746. };
  747. static struct miscdevice gipc_dev = {
  748. .minor = GIPC_MINOR,
  749. .name = "gipc",
  750. .fops = &gipc_fops,
  751. .mode = 0666,
  752. };
  753. static int __init gipc_init(void) {
  754. int rv = 0;
  755. #ifdef MY_DO_MMAP
  756. LOOKUP_KSYM(do_mmap);
  757. #endif
  758. #ifdef MY_DO_MMAP_PGOFF
  759. LOOKUP_KSYM(do_mmap_pgoff);
  760. #endif
  761. #ifdef MY_FLUSH_TLB_MM_RANGE
  762. LOOKUP_KSYM(flush_tlb_mm_range);
  763. #endif
  764. #ifdef MY_FLUSH_TLB_PAGE
  765. LOOKUP_KSYM(flush_tlb_page);
  766. #endif
  767. #ifndef gipc_get_session
  768. my_gipc_get_session = (void*)kallsyms_lookup_name("gipc_get_session");
  769. #endif
  770. /* Register the kmem cache */
  771. gipc_queue_cachep = kmem_cache_create("gipc_queue", sizeof(struct gipc_queue), 0,
  772. SLAB_HWCACHE_ALIGN |
  773. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
  774. SLAB_TYPESAFE_BY_RCU,
  775. #else
  776. SLAB_DESTROY_BY_RCU,
  777. #endif
  778. NULL);
  779. if (!gipc_queue_cachep) {
  780. printk(KERN_ERR "Graphene error: failed to create a gipc queues cache\n");
  781. return -ENOMEM;
  782. }
  783. gipc_send_buffer_cachep =
  784. kmem_cache_create("gipc_send_buffer", sizeof(struct gipc_send_buffer), 0,
  785. SLAB_HWCACHE_ALIGN |
  786. #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
  787. SLAB_TYPESAFE_BY_RCU,
  788. #else
  789. SLAB_DESTROY_BY_RCU,
  790. #endif
  791. NULL);
  792. if (!gipc_send_buffer_cachep) {
  793. printk(KERN_ERR "Graphene error: failed to create a gipc buffers cache\n");
  794. return -ENOMEM;
  795. }
  796. INIT_LIST_HEAD(&gdev.channels);
  797. spin_lock_init(&gdev.lock);
  798. gdev.max_token = 1;
  799. rv = misc_register(&gipc_dev);
  800. if (rv) {
  801. printk(KERN_ERR "Graphene error: failed to add a char device (rv=%d)\n", rv);
  802. return rv;
  803. }
  804. printk(KERN_ALERT "Graphene IPC: Hello, world\n");
  805. return 0;
  806. }
  807. static void __exit gipc_exit(void) {
  808. struct gipc_queue *gq, *n;
  809. spin_lock(&gdev.lock);
  810. list_for_each_entry_safe(gq, n, &gdev.channels, list) {
  811. release_gipc_queue(gq, true);
  812. }
  813. spin_unlock(&gdev.lock);
  814. misc_deregister(&gipc_dev);
  815. kmem_cache_destroy(gipc_queue_cachep);
  816. printk(KERN_ALERT "Graphene IPC: Goodbye, cruel world\n");
  817. }
  818. module_init(gipc_init);
  819. module_exit(gipc_exit);