shim_semget.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. /* -*- mode:c; c-file-style:"k&r"; c-basic-offset: 4; tab-width:4; indent-tabs-mode:nil; mode:auto-fill; fill-column:78; -*- */
  2. /* vim: set ts=4 sw=4 et tw=78 fo=cqt wm=0: */
  3. /* Copyright (C) 2014 OSCAR lab, Stony Brook University
  4. This file is part of Graphene Library OS.
  5. Graphene Library OS is free software: you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License
  7. as published by the Free Software Foundation, either version 3 of the
  8. License, or (at your option) any later version.
  9. Graphene Library OS is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. /*
  16. * shim_semget.c
  17. *
  18. * Implementation of system call "semget", "semop", "semtimedop" and "semctl".
  19. */
  20. #include <shim_internal.h>
  21. #include <shim_table.h>
  22. #include <shim_utils.h>
  23. #include <shim_handle.h>
  24. #include <shim_ipc.h>
  25. #include <shim_sysv.h>
  26. #include <shim_profile.h>
  27. #include <pal.h>
  28. #include <pal_error.h>
  29. #include <linux_list.h>
  30. #include <errno.h>
  31. #define SEM_HASH_LEN 8
  32. #define SEM_HASH_NUM (1 << SEM_HASH_LEN)
  33. #define SEM_HASH_MASK (SEM_HASH_NUM - 1)
  34. #define SEM_HASH(idx) ((idx) & SEM_HASH_MASK)
  35. static LIST_HEAD(sem_list);
  36. static struct hlist_head sem_key_hlist [SEM_HASH_NUM];
  37. static struct hlist_head sem_sid_hlist [SEM_HASH_NUM];
  38. static LOCKTYPE sem_list_lock;
  39. DEFINE_PROFILE_CATAGORY(sysv_sem, );
  40. #define SEM_TO_HANDLE(semhdl) \
  41. container_of((semhdl), struct shim_handle, info.sem)
  42. static int __add_sem_handle (unsigned long key, IDTYPE semid,
  43. int nsems, bool owned,
  44. struct shim_sem_handle ** semhdl)
  45. {
  46. struct hlist_head * key_head = (key != IPC_PRIVATE) ?
  47. &sem_key_hlist[SEM_HASH(key)] : NULL;
  48. struct hlist_head * sid_head = semid ?
  49. &sem_sid_hlist[SEM_HASH(semid)] : NULL;
  50. struct shim_sem_handle * tmp;
  51. struct hlist_node * pos;
  52. int ret = 0;
  53. if (key_head)
  54. hlist_for_each_entry(tmp, pos, key_head, key_hlist)
  55. if (tmp->semkey == key) {
  56. if (tmp->semid == semid)
  57. goto out;
  58. return -EEXIST;
  59. }
  60. if (sid_head)
  61. hlist_for_each_entry(tmp, pos, sid_head, sid_hlist)
  62. if (tmp->semid == semid) {
  63. if (key)
  64. tmp->semkey = key;
  65. goto out;
  66. }
  67. struct shim_handle * hdl = get_new_handle();
  68. if (!hdl)
  69. return -ENOMEM;
  70. tmp = &hdl->info.sem;
  71. hdl->type = TYPE_SEM;
  72. tmp->semkey = key;
  73. tmp->semid = semid;
  74. tmp->owned = owned;
  75. tmp->event = DkNotificationEventCreate(PAL_FALSE);
  76. if (owned && nsems) {
  77. tmp->nsems = nsems;
  78. tmp->sems = malloc(sizeof(struct sem_obj) * nsems);
  79. if (!tmp->sems) {
  80. ret = -ENOMEM;
  81. goto failed;
  82. }
  83. for (int i = 0 ; i < nsems ; i++) {
  84. tmp->sems[i].num = i;
  85. tmp->sems[i].val = 0;
  86. tmp->sems[i].host_sem_id = 0;
  87. tmp->sems[i].host_sem = NULL;
  88. INIT_LIST_HEAD(&tmp->sems[i].ops);
  89. INIT_LIST_HEAD(&tmp->sems[i].next_ops);
  90. }
  91. }
  92. INIT_LIST_HEAD(&tmp->migrated);
  93. INIT_LIST_HEAD(&tmp->list);
  94. get_handle(hdl);
  95. list_add_tail(&tmp->list, &sem_list);
  96. INIT_HLIST_NODE(&tmp->key_hlist);
  97. if (key_head) {
  98. get_handle(hdl);
  99. hlist_add_head(&tmp->key_hlist, key_head);
  100. }
  101. if (sid_head) {
  102. get_handle(hdl);
  103. hlist_add_head(&tmp->sid_hlist, sid_head);
  104. }
  105. out:
  106. if (!semhdl) {
  107. put_handle(hdl);
  108. return 0;
  109. }
  110. *semhdl = tmp;
  111. return 0;
  112. failed:
  113. put_handle(hdl);
  114. return ret;
  115. }
  116. int add_sem_handle (unsigned long key, IDTYPE id, int nsems, bool owned)
  117. {
  118. lock(sem_list_lock);
  119. int ret = __add_sem_handle(key, id, nsems, owned, NULL);
  120. unlock(sem_list_lock);
  121. return ret;
  122. }
  123. struct shim_sem_handle * get_sem_handle_by_key (unsigned long key)
  124. {
  125. struct hlist_head * key_head = &sem_key_hlist[SEM_HASH(key)];
  126. struct shim_sem_handle * tmp, * found = NULL;
  127. struct hlist_node * pos;
  128. lock(sem_list_lock);
  129. hlist_for_each_entry(tmp, pos, key_head, key_hlist)
  130. if (tmp->semkey == key) {
  131. found = tmp;
  132. break;
  133. }
  134. if (found)
  135. get_handle(SEM_TO_HANDLE(found));
  136. unlock(sem_list_lock);
  137. return found;
  138. }
  139. struct shim_sem_handle * get_sem_handle_by_id (IDTYPE semid)
  140. {
  141. struct hlist_head * sid_head = &sem_sid_hlist[SEM_HASH(semid)];
  142. struct shim_sem_handle * tmp, * found = NULL;
  143. struct hlist_node * pos;
  144. lock(sem_list_lock);
  145. hlist_for_each_entry(tmp, pos, sid_head, sid_hlist)
  146. if (tmp->semid == semid) {
  147. found = tmp;
  148. break;
  149. }
  150. if (found)
  151. get_handle(SEM_TO_HANDLE(found));
  152. unlock(sem_list_lock);
  153. return found;
  154. }
  155. void put_sem_handle (struct shim_sem_handle * sem)
  156. {
  157. put_handle(SEM_TO_HANDLE(sem));
  158. }
  159. static int __del_sem_handle (struct shim_sem_handle * sem)
  160. {
  161. if (sem->deleted)
  162. return 0;
  163. sem->deleted = true;
  164. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  165. lock(sem_list_lock);
  166. list_del_init(&sem->list);
  167. put_handle(hdl);
  168. if (!hlist_unhashed(&sem->key_hlist)) {
  169. hlist_del_init(&sem->key_hlist);
  170. put_handle(hdl);
  171. }
  172. if (!hlist_unhashed(&sem->sid_hlist)) {
  173. hlist_del_init(&sem->sid_hlist);
  174. put_handle(hdl);
  175. }
  176. unlock(sem_list_lock);
  177. return 0;
  178. }
  179. int del_sem_handle (struct shim_sem_handle * sem)
  180. {
  181. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  182. lock(hdl->lock);
  183. int ret = del_sem_handle(sem);
  184. unlock(hdl->lock);
  185. return ret;
  186. }
  187. static void __try_create_lock (void)
  188. {
  189. create_lock_runtime(&sem_list_lock);
  190. }
  191. int shim_do_semget (key_t key, int nsems, int semflg)
  192. {
  193. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  194. IDTYPE semid = 0;
  195. int ret;
  196. __try_create_lock();
  197. if (key != IPC_PRIVATE) {
  198. struct shim_sem_handle * sem = get_sem_handle_by_key(key);
  199. if (sem) {
  200. semid = sem->semid;
  201. put_sem_handle(sem);
  202. return (semflg & IPC_EXCL) ? -EEXIST : semid;
  203. }
  204. }
  205. struct sysv_key k;
  206. k.key = key;
  207. k.type = SYSV_SEM;
  208. if (semflg & IPC_CREAT) {
  209. do {
  210. semid = allocate_sysv(0, 0);
  211. if (!semid)
  212. semid = ipc_sysv_lease_send(NULL);
  213. } while (!semid);
  214. if (key != IPC_PRIVATE) {
  215. if ((ret = ipc_sysv_tellkey_send(NULL, 0, &k, semid, 0)) < 0) {
  216. release_sysv(semid);
  217. return ret;
  218. }
  219. }
  220. add_sem_handle(key, semid, nsems, true);
  221. } else {
  222. if ((ret = ipc_sysv_findkey_send(&k)) < 0)
  223. return ret;
  224. semid = ret;
  225. if ((ret = ipc_sysv_query_send(semid)) < 0)
  226. return ret;
  227. }
  228. return semid;
  229. }
  230. static int connect_sem_handle (int semid, int nsems,
  231. struct shim_sem_handle ** semp)
  232. {
  233. struct shim_sem_handle * sem = get_sem_handle_by_id(semid);
  234. int ret;
  235. if (!sem) {
  236. if ((ret = ipc_sysv_query_send(semid)) < 0)
  237. return ret;
  238. if (!sem) {
  239. lock(sem_list_lock);
  240. ret = __add_sem_handle(IPC_PRIVATE, semid, nsems, false, &sem);
  241. unlock(sem_list_lock);
  242. if (ret < 0)
  243. return ret;
  244. }
  245. }
  246. *semp = sem;
  247. return 0;
  248. }
  249. int recover_sem_ownership (struct shim_sem_handle * sem,
  250. struct sem_backup * backups, int nbackups,
  251. struct sem_client_backup * clients, int nclients)
  252. {
  253. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  254. lock(hdl->lock);
  255. assert(!sem->owned);
  256. assert(!sem->nsems && !sem->sems);
  257. sem->nsems = nbackups;
  258. if (!sem->sems && !(sem->sems = malloc(sizeof(struct sem_obj) * nbackups)))
  259. goto out;
  260. for (int i = 0 ; i < nbackups ; i++) {
  261. sem->sems[i].num = i;
  262. sem->sems[i].val = backups[i].val;
  263. sem->sems[i].zcnt = backups[i].zcnt;
  264. sem->sems[i].ncnt = backups[i].ncnt;
  265. sem->sems[i].pid = backups[i].pid;
  266. INIT_LIST_HEAD(&sem->sems[i].ops);
  267. INIT_LIST_HEAD(&sem->sems[i].next_ops);
  268. }
  269. for (int i = 0 ; i < nclients ; i++) {
  270. struct sem_ops * op = malloc(sizeof(struct sem_ops));
  271. if (!op)
  272. continue;
  273. op->stat.completed = false;
  274. op->stat.failed = false;
  275. op->stat.nops = clients[i].nops;
  276. op->stat.current = clients[i].current;
  277. op->stat.timeout = -1;
  278. op->client.vmid = clients[i].vmid;
  279. op->client.port = NULL;
  280. op->client.seq = clients[i].seq;
  281. INIT_LIST_HEAD(&op->progress);
  282. list_add_tail(&op->progress, &sem->migrated);
  283. }
  284. sem->owned = true;
  285. DkEventSet(sem->event);
  286. out:
  287. unlock(hdl->lock);
  288. return 0;
  289. }
  290. static int __do_semop (int semid, struct sembuf * sops, unsigned int nsops,
  291. unsigned long timeout)
  292. {
  293. int ret;
  294. struct shim_sem_handle * sem;
  295. int nsems = 0;
  296. for (int i = 0 ; i < nsops ; i++)
  297. if (sops[i].sem_num >= nsems)
  298. nsems = sops[i].sem_num + 1;
  299. __try_create_lock();
  300. if ((ret = connect_sem_handle(semid, nsems, &sem)) < 0)
  301. return ret;
  302. ret = submit_sysv_sem(sem, sops, nsops, timeout, NULL);
  303. put_sem_handle(sem);
  304. return ret;
  305. }
  306. int shim_do_semop (int semid, struct sembuf * sops, unsigned int nsops)
  307. {
  308. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  309. return __do_semop(semid, sops, nsops, IPC_SEM_NOTIMEOUT);
  310. }
  311. int shim_do_semtimedop (int semid, struct sembuf * sops, unsigned int nsops,
  312. const struct timespec * timeout)
  313. {
  314. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  315. return __do_semop(semid, sops, nsops,
  316. timeout->tv_sec * 1000000000ULL + timeout->tv_nsec);
  317. }
  318. int shim_do_semctl (int semid, int semnum, int cmd, unsigned long arg)
  319. {
  320. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  321. struct shim_sem_handle * sem;
  322. int ret;
  323. __try_create_lock();
  324. if ((ret = connect_sem_handle(semid, 0, &sem)) < 0)
  325. return ret;
  326. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  327. lock(hdl->lock);
  328. switch (cmd) {
  329. case IPC_RMID: {
  330. if (!sem->owned) {
  331. ret = ipc_sysv_delres_send(NULL, 0, semid, SYSV_SEM);
  332. if (ret < 0)
  333. goto out;
  334. }
  335. __del_sem_handle(sem);
  336. goto out;
  337. }
  338. }
  339. if (sem->owned) {
  340. if (sem->deleted) {
  341. ret = -EIDRM;
  342. goto out;
  343. }
  344. switch (cmd) {
  345. case GETALL:
  346. for (int i = 0 ; i < sem->nsems ; i++) {
  347. unsigned short val = sem->sems[i].val;
  348. ((unsigned short *) arg)[i] = val;
  349. }
  350. break;
  351. case GETNCNT:
  352. ret = sem->sems[semnum].ncnt;
  353. break;
  354. case GETPID:
  355. ret = sem->sems[semnum].pid;
  356. break;
  357. case GETVAL:
  358. ret = sem->sems[semnum].val;
  359. break;
  360. case GETZCNT:
  361. ret = sem->sems[semnum].zcnt;
  362. break;
  363. case SETALL:
  364. for (int i = 0 ; i < sem->nsems ; i++) {
  365. unsigned short val = ((unsigned short *) arg)[i];
  366. sem->sems[i].val = val;
  367. }
  368. break;
  369. case SETVAL: {
  370. unsigned short val = arg;
  371. sem->sems[semnum].val = val;
  372. break;
  373. }
  374. }
  375. } else {
  376. switch (cmd) {
  377. case GETALL:
  378. case SETALL: {
  379. int valsize = sem->nsems * sizeof(unsigned short);
  380. ret = ipc_sysv_semctl_send(sem->semid, 0, cmd,
  381. (unsigned short *) arg, valsize);
  382. break;
  383. }
  384. case GETVAL:
  385. case GETNCNT:
  386. case GETPID:
  387. case GETZCNT: {
  388. int valsize = sizeof(unsigned short);
  389. unsigned short val;
  390. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd,
  391. &val, valsize);
  392. if (!ret)
  393. ret = val;
  394. break;
  395. }
  396. case SETVAL: {
  397. unsigned short val = arg;
  398. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd,
  399. &val, sizeof(unsigned short));
  400. break;
  401. }
  402. }
  403. }
  404. out:
  405. unlock(hdl->lock);
  406. put_sem_handle(sem);
  407. return ret;
  408. }
  409. static bool __handle_sysv_sems (struct shim_sem_handle * sem)
  410. {
  411. bool progressed = false;
  412. bool setevent = false;
  413. struct sem_obj * sobj;
  414. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++)
  415. list_splice_tail_init(&sobj->next_ops, &sobj->ops);
  416. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  417. struct sem_ops * sops, * n;
  418. list_for_each_entry_safe(sops, n, &sobj->ops, progress) {
  419. struct sembuf * op = &sops->ops[sops->stat.current];
  420. assert(op->sem_num == sobj->num);
  421. assert(sops != n);
  422. if (sops->stat.completed)
  423. goto send_result;
  424. again:
  425. if (op->sem_op > 0) {
  426. sobj->val += op->sem_op;
  427. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op,
  428. sobj->val);
  429. } else if (op->sem_op < 0) {
  430. if (sobj->val < -op->sem_op) {
  431. if (op->sem_flg & IPC_NOWAIT) {
  432. debug("sem %u: wait for %u failed\n", sobj->num,
  433. -op->sem_op);
  434. goto failed;
  435. }
  436. continue;
  437. }
  438. sobj->val -= -op->sem_op;
  439. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op,
  440. sobj->val);
  441. } else {
  442. if (sobj->val) {
  443. if (op->sem_flg & IPC_NOWAIT) {
  444. debug("sem %u: wait for 0 failed\n", sobj->num);
  445. goto failed;
  446. }
  447. continue;
  448. }
  449. debug("sem %u: wait for 0\n", sobj->num);
  450. }
  451. progressed = true;
  452. sops->stat.current++;
  453. if (sops->stat.current == sops->stat.nops) {
  454. sops->stat.completed = true;
  455. goto send_result;
  456. }
  457. op = &sops->ops[sops->stat.current];
  458. if (op->sem_num != sobj->num) {
  459. list_move_tail(&sops->progress,
  460. &sem->sems[op->sem_num].next_ops);
  461. continue;
  462. }
  463. goto again;
  464. failed:
  465. progressed = true;
  466. sops->stat.failed = true;
  467. send_result:
  468. list_del_init(&sops->progress);
  469. sem->nreqs--;
  470. if (!sops->client.vmid) {
  471. setevent = true;
  472. continue;
  473. }
  474. send_ipc_message(create_ipc_resp_msg_on_stack(
  475. sops->stat.completed ? 0 : -EAGAIN,
  476. sops->client.vmid,
  477. sops->client.seq), sops->client.port);
  478. put_ipc_port(sops->client.port);
  479. sops->client.vmid = 0;
  480. sops->client.port = NULL;
  481. sops->client.seq = 0;
  482. free(sops);
  483. }
  484. }
  485. if (setevent)
  486. DkEventSet(sem->event);
  487. return progressed;
  488. }
  489. static void __handle_one_sysv_sem (struct shim_sem_handle * sem,
  490. struct sem_stat * stat,
  491. struct sembuf * sops)
  492. {
  493. bool progressed = false;
  494. again:
  495. while (stat->current < stat->nops) {
  496. struct sem_obj * sobj = &sem->sems[sops[stat->current].sem_num];
  497. struct sembuf * op = &sops[stat->current];
  498. if (op->sem_op > 0) {
  499. progressed = true;
  500. sobj->val += op->sem_op;
  501. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op,
  502. sobj->val);
  503. } else if (op->sem_op < 0) {
  504. if (sobj->val < -op->sem_op) {
  505. if (op->sem_flg & IPC_NOWAIT) {
  506. stat->failed = true;
  507. debug("sem %u: wait for %u failed\n", sobj->num,
  508. -op->sem_op);
  509. return;
  510. }
  511. goto failed;
  512. }
  513. progressed = true;
  514. sobj->val -= -op->sem_op;
  515. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op,
  516. sobj->val);
  517. } else {
  518. if (sobj->val) {
  519. if (op->sem_flg & IPC_NOWAIT) {
  520. stat->failed = true;
  521. debug("sem %u: wait for 0 failed\n", sobj->num);
  522. return;
  523. }
  524. goto failed;
  525. }
  526. progressed = true;
  527. debug("sem %u: wait for 0\n", sobj->num);
  528. }
  529. stat->current++;
  530. }
  531. stat->completed = true;
  532. failed:
  533. if (progressed) {
  534. while (__handle_sysv_sems(sem));
  535. progressed = false;
  536. if (!stat->completed)
  537. goto again;
  538. }
  539. }
  540. static int sem_balance_migrate (struct shim_handle * hdl,
  541. struct sysv_client * client);
  542. static struct sysv_balance_policy sem_policy = {
  543. .score_decay = SEM_SCORE_DECAY,
  544. .score_max = SEM_SCORE_MAX,
  545. .balance_threshold = SEM_BALANCE_THRESHOLD,
  546. .migrate = &sem_balance_migrate,
  547. };
  548. DEFINE_PROFILE_CATAGORY(submit_sysv_sem, sysv_sem);
  549. DEFINE_PROFILE_INTERVAL(sem_prepare_stat, submit_sysv_sem);
  550. DEFINE_PROFILE_INTERVAL(sem_lock_handle, submit_sysv_sem);
  551. DEFINE_PROFILE_INTERVAL(sem_count_score, submit_sysv_sem);
  552. DEFINE_PROFILE_INTERVAL(sem_handle_by_shared_semaphore, submit_sysv_sem);
  553. DEFINE_PROFILE_INTERVAL(sem_send_ipc_movres, submit_sysv_sem);
  554. DEFINE_PROFILE_INTERVAL(sem_send_ipc_semop, submit_sysv_sem);
  555. DEFINE_PROFILE_INTERVAL(sem_handle_one_sysv_sem, submit_sysv_sem);
  556. DEFINE_PROFILE_INTERVAL(sem_send_ipc_response, submit_sysv_sem);
  557. DEFINE_PROFILE_INTERVAL(sem_alloc_semop, submit_sysv_sem);
  558. DEFINE_PROFILE_INTERVAL(sem_append_semop, submit_sysv_sem);
  559. DEFINE_PROFILE_INTERVAL(sem_wait_for_complete, submit_sysv_sem);
  560. int submit_sysv_sem (struct shim_sem_handle * sem, struct sembuf * sops,
  561. int nsops, unsigned long timeout,
  562. struct sysv_client * client)
  563. {
  564. BEGIN_PROFILE_INTERVAL();
  565. int ret = 0;
  566. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  567. struct sem_ops * sem_ops = NULL;
  568. bool malloced = false;
  569. struct sem_stat stat;
  570. stat.nops = nsops;
  571. stat.current = 0;
  572. stat.timeout = timeout;
  573. stat.completed = false;
  574. stat.failed = false;
  575. SAVE_PROFILE_INTERVAL(sem_prepare_stat);
  576. lock(hdl->lock);
  577. SAVE_PROFILE_INTERVAL(sem_lock_handle);
  578. if (sem->deleted) {
  579. ret = -EIDRM;
  580. goto out_locked;
  581. }
  582. IDTYPE semid = sem->semid;
  583. bool sendreply = false;
  584. unsigned long seq = client ? client->seq : 0;
  585. int score = 0;
  586. for (int i = 0 ; i < nsops ; i++) {
  587. struct sembuf * op = &sops[i];
  588. if (op->sem_num > 0) {
  589. score += SEM_POSITIVE_SCORE(op->sem_num);
  590. } else if (op->sem_num < 0) {
  591. score += SEM_NEGATIVE_SCORE(-op->sem_num);
  592. sendreply = true;
  593. } else {
  594. score += SEM_ZERO_SCORE;
  595. sendreply = true;
  596. }
  597. }
  598. SAVE_PROFILE_INTERVAL(sem_count_score);
  599. if (sem->deleted) {
  600. if (!client || sendreply) {
  601. ret = -EIDRM;
  602. goto out_locked;
  603. }
  604. ret = ipc_sysv_delres_send(client->port, client->vmid, sem->semid,
  605. SYSV_SEM);
  606. goto out_locked;
  607. }
  608. if (sem->owned) {
  609. __balance_sysv_score(&sem_policy, hdl, sem->scores, MAX_SYSV_CLIENTS,
  610. client, score);
  611. if (!sem->owned && client) {
  612. struct shim_ipc_info * owner = sem->owner;
  613. assert(owner);
  614. ret = ipc_sysv_movres_send(client, owner->vmid,
  615. qstrgetstr(&owner->uri), sem->lease,
  616. sem->semid, SYSV_SEM);
  617. goto out_locked;
  618. }
  619. }
  620. if (!sem->owned) {
  621. if (client) {
  622. struct shim_ipc_info * owner = sem->owner;
  623. ret = owner ?
  624. ipc_sysv_movres_send(client, owner->vmid,
  625. qstrgetstr(&owner->uri), sem->lease,
  626. sem->semid, SYSV_SEM) :
  627. -ECONNREFUSED;
  628. SAVE_PROFILE_INTERVAL(sem_send_ipc_movres);
  629. goto out_locked;
  630. }
  631. unowned:
  632. unlock(hdl->lock);
  633. ret = ipc_sysv_semop_send(semid, sops, nsops, timeout, &seq);
  634. if (ret != -EAGAIN &&
  635. ret != -ECONNREFUSED)
  636. goto out;
  637. lock(hdl->lock);
  638. SAVE_PROFILE_INTERVAL(sem_send_ipc_semop);
  639. if (!sem->owned)
  640. goto out_locked;
  641. }
  642. if (seq) {
  643. struct sem_ops * op;
  644. list_for_each_entry(op, &sem->migrated, progress)
  645. if (op->client.vmid == (client ? client->vmid : cur_process.vmid)
  646. && seq == op->client.seq) {
  647. list_del_init(&op->progress);
  648. sem_ops = op;
  649. stat = sem_ops->stat;
  650. malloced = true;
  651. break;
  652. }
  653. }
  654. __handle_one_sysv_sem(sem, &stat, sops);
  655. SAVE_PROFILE_INTERVAL(sem_handle_one_sysv_sem);
  656. if (stat.completed || stat.failed) {
  657. ret = stat.completed ? 0 : -EAGAIN;
  658. if (client && sendreply)
  659. ret = send_ipc_message(create_ipc_resp_msg_on_stack(
  660. ret, client->vmid,
  661. client->seq), client->port);
  662. SAVE_PROFILE_INTERVAL(sem_send_ipc_response);
  663. goto out_locked;
  664. }
  665. if (client) {
  666. assert(sendreply);
  667. if (!sem_ops || !malloced) {
  668. sem_ops = malloc(sizeof(struct sem_ops) +
  669. sizeof(struct sembuf) * nsops);
  670. if (!sem_ops) {
  671. ret = -ENOMEM;
  672. goto out_locked;
  673. }
  674. sem_ops->client.vmid = 0;
  675. sem_ops->client.port = NULL;
  676. sem_ops->client.seq = 0;
  677. INIT_LIST_HEAD(&sem_ops->progress);
  678. malloced = true;
  679. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  680. }
  681. } else {
  682. if (!sem_ops) {
  683. sem_ops = __alloca(sizeof(struct sem_ops) +
  684. sizeof(struct sembuf) * nsops);
  685. sem_ops->client.vmid = 0;
  686. sem_ops->client.port = NULL;
  687. sem_ops->client.seq = 0;
  688. INIT_LIST_HEAD(&sem_ops->progress);
  689. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  690. }
  691. }
  692. sem_ops->stat = stat;
  693. for (int i = 0 ; i < nsops ; i++)
  694. sem_ops->ops[i] = sops[i];
  695. struct list_head * next_ops =
  696. &sem->sems[sops[stat.current].sem_num].next_ops;
  697. assert(list_empty(&sem_ops->progress));
  698. list_add_tail(&sem_ops->progress, next_ops);
  699. //check_list_head(next_ops);
  700. sem->nreqs++;
  701. SAVE_PROFILE_INTERVAL(sem_append_semop);
  702. if (client) {
  703. assert(sendreply);
  704. add_ipc_port(client->port, client->vmid, IPC_PORT_SYSVCON, NULL);
  705. get_ipc_port(client->port);
  706. sem_ops->client = *client;
  707. sem_ops = NULL;
  708. goto out_locked;
  709. }
  710. while (!sem_ops->stat.completed &&
  711. !sem_ops->stat.failed) {
  712. if (!sem->owned) {
  713. list_del_init(&sem_ops->progress);
  714. goto unowned;
  715. }
  716. unlock(hdl->lock);
  717. DkObjectsWaitAny(1, &sem->event, NO_TIMEOUT);
  718. lock(hdl->lock);
  719. SAVE_PROFILE_INTERVAL(sem_wait_for_complete);
  720. }
  721. ret = sem_ops->stat.completed ? 0 : -EAGAIN;
  722. out_locked:
  723. unlock(hdl->lock);
  724. out:
  725. if (sem_ops && malloced)
  726. free(sem_ops);
  727. return ret;
  728. }
  729. static int sem_balance_migrate (struct shim_handle * hdl,
  730. struct sysv_client * src)
  731. {
  732. struct shim_sem_handle * sem = &hdl->info.sem;
  733. int ret = 0;
  734. debug("trigger semaphore balancing, migrate to process %u\n", src->vmid);
  735. struct sem_backup * sem_backups = __alloca(sizeof(struct sem_backup) *
  736. sem->nsems);
  737. struct sem_client_backup * clients =
  738. __alloca(sizeof(struct sem_client_backup) * sem->nreqs);
  739. int sem_cnt = 0, client_cnt = 0;
  740. struct sem_obj * sobj;
  741. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  742. assert(sem_cnt < sem->nsems);
  743. struct sem_backup * b = sem_backups + (sem_cnt++);
  744. b->val = sobj->val;
  745. b->zcnt = sobj->zcnt;
  746. b->ncnt = sobj->ncnt;
  747. b->pid = sobj->pid;
  748. list_splice_tail(&sobj->next_ops, &sobj->ops);
  749. struct sem_ops * sops;
  750. list_for_each_entry(sops, &sobj->ops, progress) {
  751. assert(client_cnt < sem->nreqs);
  752. struct sem_client_backup * c = clients + (client_cnt)++;
  753. c->vmid = sops->client.vmid;
  754. c->seq = sops->client.seq;
  755. c->current = sops->stat.current;
  756. c->nops = sops->stat.nops;
  757. }
  758. }
  759. struct shim_ipc_info * info = discover_client(src->port, src->vmid);
  760. if (!info)
  761. goto out;
  762. ipc_sysv_sublease_send(src->vmid, sem->semid,
  763. qstrgetstr(&info->uri),
  764. &sem->lease);
  765. ret = ipc_sysv_semmov_send(src->port, src->vmid, sem->semid, sem->lease,
  766. sem_backups, sem_cnt, clients, client_cnt,
  767. sem->scores, MAX_SYSV_CLIENTS);
  768. if (ret < 0)
  769. goto failed_info;
  770. sem->owned = false;
  771. sem->owner = info;
  772. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  773. struct sem_ops * sops, * n;
  774. list_for_each_entry_safe(sops, n, &sobj->ops, progress) {
  775. list_del_init(&sops->progress);
  776. sem->nreqs--;
  777. sops->stat.failed = true;
  778. if (!sops->client.vmid)
  779. continue;
  780. ipc_sysv_movres_send(&sops->client, src->vmid,
  781. qstrgetstr(&info->uri), sem->lease,
  782. sem->semid, SYSV_SEM);
  783. put_ipc_port(sops->client.port);
  784. free(sops);
  785. }
  786. }
  787. sem->nsems = 0;
  788. free(sem->sems);
  789. sem->sems = NULL;
  790. ret = 0;
  791. DkEventSet(sem->event);
  792. goto out;
  793. failed_info:
  794. put_ipc_info(info);
  795. out:
  796. return ret;
  797. }