shim_semget.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_semget.c
  15. *
  16. * Implementation of system call "semget", "semop", "semtimedop" and "semctl".
  17. */
  18. #include <shim_internal.h>
  19. #include <shim_table.h>
  20. #include <shim_utils.h>
  21. #include <shim_handle.h>
  22. #include <shim_ipc.h>
  23. #include <shim_sysv.h>
  24. #include <shim_profile.h>
  25. #include <pal.h>
  26. #include <pal_error.h>
  27. #include <list.h>
  28. #include <errno.h>
  29. #define SEM_HASH_LEN 8
  30. #define SEM_HASH_NUM (1 << SEM_HASH_LEN)
  31. #define SEM_HASH_MASK (SEM_HASH_NUM - 1)
  32. #define SEM_HASH(idx) ((idx) & SEM_HASH_MASK)
  33. /* The sem_list links shim_sem_handle objects by the list field.
  34. * The sem_key_hlist links them by key_hlist, and qid_hlist by qid_hlist */
  35. DEFINE_LISTP(shim_sem_handle);
  36. static LISTP_TYPE(shim_sem_handle) sem_list;
  37. static LISTP_TYPE(shim_sem_handle) sem_key_hlist [SEM_HASH_NUM];
  38. static LISTP_TYPE(shim_sem_handle) sem_sid_hlist [SEM_HASH_NUM];
  39. static struct shim_lock sem_list_lock;
  40. DEFINE_PROFILE_CATEGORY(sysv_sem, );
  41. #define SEM_TO_HANDLE(semhdl) \
  42. container_of((semhdl), struct shim_handle, info.sem)
  43. static int __add_sem_handle (unsigned long key, IDTYPE semid,
  44. int nsems, bool owned,
  45. struct shim_sem_handle ** semhdl)
  46. {
  47. LISTP_TYPE(shim_sem_handle) * key_head = (key != IPC_PRIVATE) ?
  48. &sem_key_hlist[SEM_HASH(key)] : NULL;
  49. LISTP_TYPE(shim_sem_handle) * sid_head = semid ?
  50. &sem_sid_hlist[SEM_HASH(semid)] : NULL;
  51. struct shim_sem_handle * tmp;
  52. int ret = 0;
  53. if (key_head)
  54. LISTP_FOR_EACH_ENTRY(tmp, key_head, key_hlist)
  55. if (tmp->semkey == key) {
  56. if (tmp->semid == semid)
  57. goto out;
  58. return -EEXIST;
  59. }
  60. if (sid_head)
  61. LISTP_FOR_EACH_ENTRY(tmp, sid_head, sid_hlist)
  62. if (tmp->semid == semid) {
  63. if (key)
  64. tmp->semkey = key;
  65. goto out;
  66. }
  67. struct shim_handle * hdl = get_new_handle();
  68. if (!hdl)
  69. return -ENOMEM;
  70. tmp = &hdl->info.sem;
  71. hdl->type = TYPE_SEM;
  72. tmp->semkey = key;
  73. tmp->semid = semid;
  74. tmp->owned = owned;
  75. tmp->event = DkNotificationEventCreate(PAL_FALSE);
  76. if (owned && nsems) {
  77. tmp->nsems = nsems;
  78. tmp->sems = malloc(sizeof(struct sem_obj) * nsems);
  79. if (!tmp->sems) {
  80. ret = -ENOMEM;
  81. goto failed;
  82. }
  83. for (int i = 0 ; i < nsems ; i++) {
  84. tmp->sems[i].num = i;
  85. tmp->sems[i].val = 0;
  86. tmp->sems[i].host_sem_id = 0;
  87. tmp->sems[i].host_sem = NULL;
  88. INIT_LISTP(&tmp->sems[i].ops);
  89. INIT_LISTP(&tmp->sems[i].next_ops);
  90. }
  91. }
  92. INIT_LISTP(&tmp->migrated);
  93. INIT_LIST_HEAD(tmp, list);
  94. get_handle(hdl);
  95. LISTP_ADD_TAIL(tmp, &sem_list, list);
  96. INIT_LIST_HEAD(tmp, key_hlist);
  97. if (key_head) {
  98. get_handle(hdl);
  99. LISTP_ADD(tmp, key_head, key_hlist);
  100. }
  101. if (sid_head) {
  102. get_handle(hdl);
  103. LISTP_ADD(tmp, sid_head, sid_hlist);
  104. }
  105. out:
  106. if (!semhdl) {
  107. put_handle(hdl);
  108. return 0;
  109. }
  110. *semhdl = tmp;
  111. return 0;
  112. failed:
  113. put_handle(hdl);
  114. return ret;
  115. }
  116. int add_sem_handle (unsigned long key, IDTYPE id, int nsems, bool owned)
  117. {
  118. lock(&sem_list_lock);
  119. int ret = __add_sem_handle(key, id, nsems, owned, NULL);
  120. unlock(&sem_list_lock);
  121. return ret;
  122. }
  123. struct shim_sem_handle * get_sem_handle_by_key (unsigned long key)
  124. {
  125. LISTP_TYPE(shim_sem_handle) * key_head = &sem_key_hlist[SEM_HASH(key)];
  126. struct shim_sem_handle * tmp, * found = NULL;
  127. lock(&sem_list_lock);
  128. LISTP_FOR_EACH_ENTRY(tmp, key_head, key_hlist)
  129. if (tmp->semkey == key) {
  130. found = tmp;
  131. break;
  132. }
  133. if (found)
  134. get_handle(SEM_TO_HANDLE(found));
  135. unlock(&sem_list_lock);
  136. return found;
  137. }
  138. struct shim_sem_handle * get_sem_handle_by_id (IDTYPE semid)
  139. {
  140. LISTP_TYPE(shim_sem_handle) * sid_head = &sem_sid_hlist[SEM_HASH(semid)];
  141. struct shim_sem_handle * tmp, * found = NULL;
  142. lock(&sem_list_lock);
  143. LISTP_FOR_EACH_ENTRY(tmp, sid_head, sid_hlist)
  144. if (tmp->semid == semid) {
  145. found = tmp;
  146. break;
  147. }
  148. if (found)
  149. get_handle(SEM_TO_HANDLE(found));
  150. unlock(&sem_list_lock);
  151. return found;
  152. }
  153. void put_sem_handle (struct shim_sem_handle * sem)
  154. {
  155. put_handle(SEM_TO_HANDLE(sem));
  156. }
  157. static int __del_sem_handle (struct shim_sem_handle * sem)
  158. {
  159. if (sem->deleted)
  160. return 0;
  161. sem->deleted = true;
  162. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  163. lock(&sem_list_lock);
  164. LISTP_DEL_INIT(sem, &sem_list, list);
  165. put_handle(hdl);
  166. if (!LIST_EMPTY(sem, key_hlist)) {
  167. // DEP: Yuck
  168. LISTP_TYPE(shim_sem_handle) * key_head = &sem_key_hlist[SEM_HASH(sem->semkey)];
  169. LISTP_DEL_INIT(sem, key_head, key_hlist);
  170. put_handle(hdl);
  171. }
  172. if (!LIST_EMPTY(sem, sid_hlist)) {
  173. // DEP: Yuck
  174. LISTP_TYPE(shim_sem_handle) * sid_head = &sem_sid_hlist[SEM_HASH(sem->semid)];
  175. LISTP_DEL_INIT(sem, sid_head, sid_hlist);
  176. put_handle(hdl);
  177. }
  178. unlock(&sem_list_lock);
  179. return 0;
  180. }
  181. int del_sem_handle (struct shim_sem_handle * sem)
  182. {
  183. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  184. lock(&hdl->lock);
  185. int ret = del_sem_handle(sem);
  186. unlock(&hdl->lock);
  187. return ret;
  188. }
  189. static void __try_create_lock (void)
  190. {
  191. create_lock_runtime(&sem_list_lock);
  192. }
  193. int shim_do_semget (key_t key, int nsems, int semflg)
  194. {
  195. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  196. IDTYPE semid = 0;
  197. int ret;
  198. __try_create_lock();
  199. if (key != IPC_PRIVATE) {
  200. struct shim_sem_handle * sem = get_sem_handle_by_key(key);
  201. if (sem) {
  202. semid = sem->semid;
  203. put_sem_handle(sem);
  204. return (semflg & IPC_EXCL) ? -EEXIST : (int) semid;
  205. }
  206. }
  207. struct sysv_key k;
  208. k.key = key;
  209. k.type = SYSV_SEM;
  210. if (semflg & IPC_CREAT) {
  211. do {
  212. semid = allocate_sysv(0, 0);
  213. if (!semid)
  214. semid = ipc_sysv_lease_send(NULL);
  215. } while (!semid);
  216. if (key != IPC_PRIVATE) {
  217. if ((ret = ipc_sysv_tellkey_send(NULL, 0, &k, semid, 0)) < 0) {
  218. release_sysv(semid);
  219. return ret;
  220. }
  221. }
  222. add_sem_handle(key, semid, nsems, true);
  223. } else {
  224. if ((ret = ipc_sysv_findkey_send(&k)) < 0)
  225. return ret;
  226. semid = ret;
  227. if ((ret = ipc_sysv_query_send(semid)) < 0)
  228. return ret;
  229. }
  230. return semid;
  231. }
  232. static int connect_sem_handle (int semid, int nsems,
  233. struct shim_sem_handle ** semp)
  234. {
  235. struct shim_sem_handle * sem = get_sem_handle_by_id(semid);
  236. int ret;
  237. if (!sem) {
  238. if ((ret = ipc_sysv_query_send(semid)) < 0)
  239. return ret;
  240. if (!sem) {
  241. lock(&sem_list_lock);
  242. ret = __add_sem_handle(IPC_PRIVATE, semid, nsems, false, &sem);
  243. unlock(&sem_list_lock);
  244. if (ret < 0)
  245. return ret;
  246. }
  247. }
  248. *semp = sem;
  249. return 0;
  250. }
  251. int recover_sem_ownership (struct shim_sem_handle * sem,
  252. struct sem_backup * backups, int nbackups,
  253. struct sem_client_backup * clients, int nclients)
  254. {
  255. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  256. lock(&hdl->lock);
  257. assert(!sem->owned);
  258. assert(!sem->nsems && !sem->sems);
  259. sem->nsems = nbackups;
  260. if (!sem->sems && !(sem->sems = malloc(sizeof(struct sem_obj) * nbackups)))
  261. goto out;
  262. for (int i = 0 ; i < nbackups ; i++) {
  263. sem->sems[i].num = i;
  264. sem->sems[i].val = backups[i].val;
  265. sem->sems[i].zcnt = backups[i].zcnt;
  266. sem->sems[i].ncnt = backups[i].ncnt;
  267. sem->sems[i].pid = backups[i].pid;
  268. INIT_LISTP(&sem->sems[i].ops);
  269. INIT_LISTP(&sem->sems[i].next_ops);
  270. }
  271. for (int i = 0 ; i < nclients ; i++) {
  272. struct sem_ops * op = malloc(sizeof(struct sem_ops));
  273. if (!op)
  274. continue;
  275. op->stat.completed = false;
  276. op->stat.failed = false;
  277. op->stat.nops = clients[i].nops;
  278. op->stat.current = clients[i].current;
  279. op->stat.timeout = -1;
  280. op->client.vmid = clients[i].vmid;
  281. op->client.port = NULL;
  282. op->client.seq = clients[i].seq;
  283. INIT_LIST_HEAD(op, progress);
  284. LISTP_ADD_TAIL(op, &sem->migrated, progress);
  285. }
  286. sem->owned = true;
  287. DkEventSet(sem->event);
  288. out:
  289. unlock(&hdl->lock);
  290. return 0;
  291. }
  292. static int __do_semop (int semid, struct sembuf * sops, unsigned int nsops,
  293. unsigned long timeout)
  294. {
  295. int ret;
  296. struct shim_sem_handle * sem;
  297. size_t nsems = 0;
  298. for (size_t i = 0 ; i < nsops ; i++)
  299. if (sops[i].sem_num >= nsems)
  300. nsems = sops[i].sem_num + 1;
  301. __try_create_lock();
  302. if ((ret = connect_sem_handle(semid, nsems, &sem)) < 0)
  303. return ret;
  304. ret = submit_sysv_sem(sem, sops, nsops, timeout, NULL);
  305. put_sem_handle(sem);
  306. return ret;
  307. }
  308. int shim_do_semop (int semid, struct sembuf * sops, unsigned int nsops)
  309. {
  310. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  311. return __do_semop(semid, sops, nsops, IPC_SEM_NOTIMEOUT);
  312. }
  313. int shim_do_semtimedop (int semid, struct sembuf * sops, unsigned int nsops,
  314. const struct timespec * timeout)
  315. {
  316. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  317. return __do_semop(semid, sops, nsops,
  318. timeout->tv_sec * 1000000000ULL + timeout->tv_nsec);
  319. }
  320. int shim_do_semctl (int semid, int semnum, int cmd, unsigned long arg)
  321. {
  322. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  323. struct shim_sem_handle * sem;
  324. int ret;
  325. __try_create_lock();
  326. if ((ret = connect_sem_handle(semid, 0, &sem)) < 0)
  327. return ret;
  328. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  329. lock(&hdl->lock);
  330. switch (cmd) {
  331. case IPC_RMID: {
  332. if (!sem->owned) {
  333. ret = ipc_sysv_delres_send(NULL, 0, semid, SYSV_SEM);
  334. if (ret < 0)
  335. goto out;
  336. }
  337. __del_sem_handle(sem);
  338. goto out;
  339. }
  340. }
  341. if (sem->owned) {
  342. if (sem->deleted) {
  343. ret = -EIDRM;
  344. goto out;
  345. }
  346. switch (cmd) {
  347. case GETALL:
  348. for (int i = 0 ; i < sem->nsems ; i++) {
  349. unsigned short val = sem->sems[i].val;
  350. ((unsigned short *) arg)[i] = val;
  351. }
  352. break;
  353. case GETNCNT:
  354. ret = sem->sems[semnum].ncnt;
  355. break;
  356. case GETPID:
  357. ret = sem->sems[semnum].pid;
  358. break;
  359. case GETVAL:
  360. ret = sem->sems[semnum].val;
  361. break;
  362. case GETZCNT:
  363. ret = sem->sems[semnum].zcnt;
  364. break;
  365. case SETALL:
  366. for (int i = 0 ; i < sem->nsems ; i++) {
  367. unsigned short val = ((unsigned short *) arg)[i];
  368. sem->sems[i].val = val;
  369. }
  370. break;
  371. case SETVAL: {
  372. unsigned short val = arg;
  373. sem->sems[semnum].val = val;
  374. break;
  375. }
  376. }
  377. } else {
  378. switch (cmd) {
  379. case GETALL:
  380. case SETALL: {
  381. int valsize = sem->nsems * sizeof(unsigned short);
  382. ret = ipc_sysv_semctl_send(sem->semid, 0, cmd,
  383. (unsigned short *) arg, valsize);
  384. break;
  385. }
  386. case GETVAL:
  387. case GETNCNT:
  388. case GETPID:
  389. case GETZCNT: {
  390. int valsize = sizeof(unsigned short);
  391. unsigned short val;
  392. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd,
  393. &val, valsize);
  394. if (!ret)
  395. ret = val;
  396. break;
  397. }
  398. case SETVAL: {
  399. unsigned short val = arg;
  400. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd,
  401. &val, sizeof(unsigned short));
  402. break;
  403. }
  404. }
  405. }
  406. out:
  407. unlock(&hdl->lock);
  408. put_sem_handle(sem);
  409. return ret;
  410. }
  411. static bool __handle_sysv_sems (struct shim_sem_handle * sem)
  412. {
  413. bool progressed = false;
  414. bool setevent = false;
  415. struct sem_obj * sobj;
  416. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++)
  417. LISTP_SPLICE_TAIL_INIT(&sobj->next_ops, &sobj->ops, progress, sem_ops);
  418. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  419. struct sem_ops * sops, * n;
  420. LISTP_FOR_EACH_ENTRY_SAFE(sops, n, &sobj->ops, progress) {
  421. struct sembuf * op = &sops->ops[sops->stat.current];
  422. assert(op->sem_num == sobj->num);
  423. // first_iter is a variable defined by LISTP_FOR_EACH_ENTRY_SAFE
  424. // The second part of this assertion is only valid after the first attempt
  425. assert(first_iter || (sops != n));
  426. if (sops->stat.completed)
  427. goto send_result;
  428. again:
  429. if (op->sem_op > 0) {
  430. sobj->val += op->sem_op;
  431. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op,
  432. sobj->val);
  433. } else if (op->sem_op < 0) {
  434. if (sobj->val < -op->sem_op) {
  435. if (op->sem_flg & IPC_NOWAIT) {
  436. debug("sem %u: wait for %u failed\n", sobj->num,
  437. -op->sem_op);
  438. goto failed;
  439. }
  440. continue;
  441. }
  442. sobj->val -= -op->sem_op;
  443. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op,
  444. sobj->val);
  445. } else {
  446. if (sobj->val) {
  447. if (op->sem_flg & IPC_NOWAIT) {
  448. debug("sem %u: wait for 0 failed\n", sobj->num);
  449. goto failed;
  450. }
  451. continue;
  452. }
  453. debug("sem %u: wait for 0\n", sobj->num);
  454. }
  455. progressed = true;
  456. sops->stat.current++;
  457. if (sops->stat.current == sops->stat.nops) {
  458. sops->stat.completed = true;
  459. goto send_result;
  460. }
  461. op = &sops->ops[sops->stat.current];
  462. if (op->sem_num != sobj->num) {
  463. LISTP_MOVE_TAIL(sops,
  464. &sem->sems[op->sem_num].next_ops,
  465. &sobj->ops,
  466. progress);
  467. continue;
  468. }
  469. goto again;
  470. failed:
  471. progressed = true;
  472. sops->stat.failed = true;
  473. send_result:
  474. /* Chia-Che 10/17/17: If the code reaches this point, sops should
  475. * still be in sobj->ops. */
  476. LISTP_DEL_INIT(sops, &sobj->ops, progress);
  477. sem->nreqs--;
  478. if (!sops->client.vmid) {
  479. setevent = true;
  480. continue;
  481. }
  482. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_resp));
  483. struct shim_ipc_msg* resp_msg = __alloca(total_msg_size);
  484. init_ipc_msg(resp_msg, IPC_RESP, total_msg_size, sops->client.vmid);
  485. resp_msg->seq = sops->client.seq;
  486. struct shim_ipc_resp* resp = (struct shim_ipc_resp *) resp_msg->msg;
  487. resp->retval = sops->stat.completed ? 0 : -EAGAIN;
  488. send_ipc_message(resp_msg, sops->client.port);
  489. put_ipc_port(sops->client.port);
  490. sops->client.vmid = 0;
  491. sops->client.port = NULL;
  492. sops->client.seq = 0;
  493. free(sops);
  494. }
  495. }
  496. if (setevent)
  497. DkEventSet(sem->event);
  498. return progressed;
  499. }
  500. static void __handle_one_sysv_sem (struct shim_sem_handle * sem,
  501. struct sem_stat * stat,
  502. struct sembuf * sops)
  503. {
  504. bool progressed = false;
  505. again:
  506. while (stat->current < stat->nops) {
  507. struct sem_obj * sobj = &sem->sems[sops[stat->current].sem_num];
  508. struct sembuf * op = &sops[stat->current];
  509. if (op->sem_op > 0) {
  510. progressed = true;
  511. sobj->val += op->sem_op;
  512. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op,
  513. sobj->val);
  514. } else if (op->sem_op < 0) {
  515. if (sobj->val < -op->sem_op) {
  516. if (op->sem_flg & IPC_NOWAIT) {
  517. stat->failed = true;
  518. debug("sem %u: wait for %u failed\n", sobj->num,
  519. -op->sem_op);
  520. return;
  521. }
  522. goto failed;
  523. }
  524. progressed = true;
  525. sobj->val -= -op->sem_op;
  526. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op,
  527. sobj->val);
  528. } else {
  529. if (sobj->val) {
  530. if (op->sem_flg & IPC_NOWAIT) {
  531. stat->failed = true;
  532. debug("sem %u: wait for 0 failed\n", sobj->num);
  533. return;
  534. }
  535. goto failed;
  536. }
  537. progressed = true;
  538. debug("sem %u: wait for 0\n", sobj->num);
  539. }
  540. stat->current++;
  541. }
  542. stat->completed = true;
  543. failed:
  544. if (progressed) {
  545. while (__handle_sysv_sems(sem));
  546. progressed = false;
  547. if (!stat->completed)
  548. goto again;
  549. }
  550. }
  551. #if MIGRATE_SYSV_SEM == 1
  552. static int sem_balance_migrate (struct shim_handle * hdl,
  553. struct sysv_client * client);
  554. static struct sysv_balance_policy sem_policy = {
  555. .score_decay = SEM_SCORE_DECAY,
  556. .score_max = SEM_SCORE_MAX,
  557. .balance_threshold = SEM_BALANCE_THRESHOLD,
  558. .migrate = &sem_balance_migrate,
  559. };
  560. #endif
  561. DEFINE_PROFILE_CATEGORY(submit_sysv_sem, sysv_sem);
  562. DEFINE_PROFILE_INTERVAL(sem_prepare_stat, submit_sysv_sem);
  563. DEFINE_PROFILE_INTERVAL(sem_lock_handle, submit_sysv_sem);
  564. DEFINE_PROFILE_INTERVAL(sem_count_score, submit_sysv_sem);
  565. DEFINE_PROFILE_INTERVAL(sem_handle_by_shared_semaphore, submit_sysv_sem);
  566. DEFINE_PROFILE_INTERVAL(sem_send_ipc_movres, submit_sysv_sem);
  567. DEFINE_PROFILE_INTERVAL(sem_send_ipc_semop, submit_sysv_sem);
  568. DEFINE_PROFILE_INTERVAL(sem_handle_one_sysv_sem, submit_sysv_sem);
  569. DEFINE_PROFILE_INTERVAL(sem_send_ipc_response, submit_sysv_sem);
  570. DEFINE_PROFILE_INTERVAL(sem_alloc_semop, submit_sysv_sem);
  571. DEFINE_PROFILE_INTERVAL(sem_append_semop, submit_sysv_sem);
  572. DEFINE_PROFILE_INTERVAL(sem_wait_for_complete, submit_sysv_sem);
  573. int submit_sysv_sem (struct shim_sem_handle * sem, struct sembuf * sops,
  574. int nsops, unsigned long timeout,
  575. struct sysv_client * client)
  576. {
  577. BEGIN_PROFILE_INTERVAL();
  578. int ret = 0;
  579. struct shim_handle * hdl = SEM_TO_HANDLE(sem);
  580. struct sem_ops * sem_ops = NULL;
  581. bool malloced = false;
  582. struct sem_stat stat;
  583. stat.nops = nsops;
  584. stat.current = 0;
  585. stat.timeout = timeout;
  586. stat.completed = false;
  587. stat.failed = false;
  588. SAVE_PROFILE_INTERVAL(sem_prepare_stat);
  589. lock(&hdl->lock);
  590. SAVE_PROFILE_INTERVAL(sem_lock_handle);
  591. if (sem->deleted) {
  592. ret = -EIDRM;
  593. goto out_locked;
  594. }
  595. IDTYPE semid = sem->semid;
  596. bool sendreply = false;
  597. unsigned long seq = client ? client->seq : 0;
  598. int score = 0;
  599. for (int i = 0 ; i < nsops ; i++) {
  600. struct sembuf * op = &sops[i];
  601. if (op->sem_op > 0) {
  602. score += SEM_POSITIVE_SCORE(op->sem_num);
  603. } else if (op->sem_op < 0) {
  604. score += SEM_NEGATIVE_SCORE(-op->sem_num);
  605. sendreply = true;
  606. } else {
  607. score += SEM_ZERO_SCORE;
  608. sendreply = true;
  609. }
  610. }
  611. SAVE_PROFILE_INTERVAL(sem_count_score);
  612. if (sem->deleted) {
  613. if (!client || sendreply) {
  614. ret = -EIDRM;
  615. goto out_locked;
  616. }
  617. ret = ipc_sysv_delres_send(client->port, client->vmid, sem->semid,
  618. SYSV_SEM);
  619. goto out_locked;
  620. }
  621. #if MIGRATE_SYSV_SEM == 1
  622. if (sem->owned) {
  623. __balance_sysv_score(&sem_policy, hdl, sem->scores, MAX_SYSV_CLIENTS,
  624. client, score);
  625. if (!sem->owned && client) {
  626. struct shim_ipc_info * owner = sem->owner;
  627. assert(owner);
  628. ret = ipc_sysv_movres_send(client, owner->vmid,
  629. qstrgetstr(&owner->uri), sem->lease,
  630. sem->semid, SYSV_SEM);
  631. goto out_locked;
  632. }
  633. }
  634. #endif
  635. if (!sem->owned) {
  636. if (client) {
  637. struct shim_ipc_info * owner = sem->owner;
  638. ret = owner ?
  639. ipc_sysv_movres_send(client, owner->vmid,
  640. qstrgetstr(&owner->uri), sem->lease,
  641. sem->semid, SYSV_SEM) :
  642. -ECONNREFUSED;
  643. SAVE_PROFILE_INTERVAL(sem_send_ipc_movres);
  644. goto out_locked;
  645. }
  646. unowned:
  647. unlock(&hdl->lock);
  648. ret = ipc_sysv_semop_send(semid, sops, nsops, timeout, &seq);
  649. if (ret != -EAGAIN &&
  650. ret != -ECONNREFUSED)
  651. goto out;
  652. lock(&hdl->lock);
  653. SAVE_PROFILE_INTERVAL(sem_send_ipc_semop);
  654. if (!sem->owned)
  655. goto out_locked;
  656. }
  657. if (seq) {
  658. struct sem_ops * op;
  659. LISTP_FOR_EACH_ENTRY(op, &sem->migrated, progress)
  660. if (op->client.vmid == (client ? client->vmid : cur_process.vmid)
  661. && seq == op->client.seq) {
  662. LISTP_DEL_INIT(op, &sem->migrated, progress);
  663. sem_ops = op;
  664. stat = sem_ops->stat;
  665. malloced = true;
  666. break;
  667. }
  668. }
  669. __handle_one_sysv_sem(sem, &stat, sops);
  670. SAVE_PROFILE_INTERVAL(sem_handle_one_sysv_sem);
  671. if (stat.completed || stat.failed) {
  672. ret = stat.completed ? 0 : -EAGAIN;
  673. if (client && sendreply) {
  674. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_resp));
  675. struct shim_ipc_msg* resp_msg = __alloca(total_msg_size);
  676. init_ipc_msg(resp_msg, IPC_RESP, total_msg_size, client->vmid);
  677. resp_msg->seq = client->seq;
  678. struct shim_ipc_resp* resp = (struct shim_ipc_resp *) resp_msg->msg;
  679. resp->retval = ret;
  680. ret = send_ipc_message(resp_msg, client->port);
  681. }
  682. SAVE_PROFILE_INTERVAL(sem_send_ipc_response);
  683. goto out_locked;
  684. }
  685. if (client) {
  686. assert(sendreply);
  687. if (!sem_ops || !malloced) {
  688. sem_ops = malloc(sizeof(struct sem_ops) +
  689. sizeof(struct sembuf) * nsops);
  690. if (!sem_ops) {
  691. ret = -ENOMEM;
  692. goto out_locked;
  693. }
  694. sem_ops->client.vmid = 0;
  695. sem_ops->client.port = NULL;
  696. sem_ops->client.seq = 0;
  697. INIT_LIST_HEAD(sem_ops, progress);
  698. malloced = true;
  699. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  700. }
  701. } else {
  702. if (!sem_ops) {
  703. sem_ops = __alloca(sizeof(struct sem_ops) +
  704. sizeof(struct sembuf) * nsops);
  705. sem_ops->client.vmid = 0;
  706. sem_ops->client.port = NULL;
  707. sem_ops->client.seq = 0;
  708. INIT_LIST_HEAD(sem_ops, progress);
  709. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  710. }
  711. }
  712. sem_ops->stat = stat;
  713. for (int i = 0 ; i < nsops ; i++)
  714. sem_ops->ops[i] = sops[i];
  715. LISTP_TYPE(sem_ops) * next_ops =
  716. &sem->sems[sops[stat.current].sem_num].next_ops;
  717. assert(LIST_EMPTY(sem_ops, progress));
  718. LISTP_ADD_TAIL(sem_ops, next_ops, progress);
  719. //CHECK_LIST_HEAD(next_ops);
  720. sem->nreqs++;
  721. SAVE_PROFILE_INTERVAL(sem_append_semop);
  722. if (client) {
  723. assert(sendreply);
  724. add_ipc_port(client->port, client->vmid, IPC_PORT_SYSVCON, NULL);
  725. get_ipc_port(client->port);
  726. sem_ops->client = *client;
  727. sem_ops = NULL;
  728. goto out_locked;
  729. }
  730. while (!sem_ops->stat.completed &&
  731. !sem_ops->stat.failed) {
  732. if (!sem->owned) {
  733. /* Chia-Che 10/17/17: sem_ops may move from semaphore to semaphore
  734. base on its current state */
  735. next_ops = &sem->sems[sem_ops->ops[sem_ops->stat.current].sem_num].next_ops;
  736. LISTP_DEL_INIT(sem_ops, next_ops, progress);
  737. goto unowned;
  738. }
  739. unlock(&hdl->lock);
  740. object_wait_with_retry(sem->event);
  741. lock(&hdl->lock);
  742. SAVE_PROFILE_INTERVAL(sem_wait_for_complete);
  743. }
  744. ret = sem_ops->stat.completed ? 0 : -EAGAIN;
  745. out_locked:
  746. unlock(&hdl->lock);
  747. out:
  748. if (sem_ops && malloced)
  749. free(sem_ops);
  750. return ret;
  751. }
  752. #if MIGRATE_SYSV_SEM == 1
  753. static int sem_balance_migrate (struct shim_handle * hdl,
  754. struct sysv_client * src)
  755. {
  756. struct shim_sem_handle * sem = &hdl->info.sem;
  757. int ret = 0;
  758. debug("trigger semaphore balancing, migrate to process %u\n", src->vmid);
  759. struct sem_backup * sem_backups = __alloca(sizeof(struct sem_backup) *
  760. sem->nsems);
  761. struct sem_client_backup * clients =
  762. __alloca(sizeof(struct sem_client_backup) * sem->nreqs);
  763. int sem_cnt = 0, client_cnt = 0;
  764. struct sem_obj * sobj;
  765. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  766. assert(sem_cnt < sem->nsems);
  767. struct sem_backup * b = sem_backups + (sem_cnt++);
  768. b->val = sobj->val;
  769. b->zcnt = sobj->zcnt;
  770. b->ncnt = sobj->ncnt;
  771. b->pid = sobj->pid;
  772. LISTP_SPLICE_TAIL(&sobj->next_ops, &sobj->ops, progress, sem_ops);
  773. struct sem_ops * sops;
  774. LISTP_FOR_EACH_ENTRY(sops, &sobj->ops, progress) {
  775. assert(client_cnt < sem->nreqs);
  776. struct sem_client_backup * c = clients + (client_cnt)++;
  777. c->vmid = sops->client.vmid;
  778. c->seq = sops->client.seq;
  779. c->current = sops->stat.current;
  780. c->nops = sops->stat.nops;
  781. }
  782. }
  783. struct shim_ipc_info * info = lookup_ipc_info(src->vmid);
  784. if (!info)
  785. goto out;
  786. ipc_sysv_sublease_send(src->vmid, sem->semid,
  787. qstrgetstr(&info->uri),
  788. &sem->lease);
  789. ret = ipc_sysv_semmov_send(src->port, src->vmid, sem->semid, sem->lease,
  790. sem_backups, sem_cnt, clients, client_cnt,
  791. sem->scores, MAX_SYSV_CLIENTS);
  792. if (ret < 0)
  793. goto failed_info;
  794. sem->owned = false;
  795. sem->owner = info;
  796. for (sobj = sem->sems ; sobj < &sem->sems[sem->nsems] ; sobj++) {
  797. struct sem_ops * sops, * n;
  798. LISTP_FOR_EACH_ENTRY_SAFE(sops, n, &sobj->ops, progress) {
  799. LISTP_DEL_INIT(sops, &sobj->ops, progress);
  800. sem->nreqs--;
  801. sops->stat.failed = true;
  802. if (!sops->client.vmid)
  803. continue;
  804. ipc_sysv_movres_send(&sops->client, src->vmid,
  805. qstrgetstr(&info->uri), sem->lease,
  806. sem->semid, SYSV_SEM);
  807. put_ipc_port(sops->client.port);
  808. free(sops);
  809. }
  810. }
  811. sem->nsems = 0;
  812. free(sem->sems);
  813. sem->sems = NULL;
  814. ret = 0;
  815. DkEventSet(sem->event);
  816. goto out;
  817. failed_info:
  818. put_ipc_info(info);
  819. out:
  820. return ret;
  821. }
  822. #endif