shim_semget.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. /* Copyright (C) 2014 Stony Brook University
  2. This file is part of Graphene Library OS.
  3. Graphene Library OS is free software: you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public License
  5. as published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. Graphene Library OS is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. /*
  14. * shim_semget.c
  15. *
  16. * Implementation of system call "semget", "semop", "semtimedop" and "semctl".
  17. */
  18. #include <errno.h>
  19. #include <list.h>
  20. #include <pal.h>
  21. #include <pal_error.h>
  22. #include <shim_handle.h>
  23. #include <shim_internal.h>
  24. #include <shim_ipc.h>
  25. #include <shim_profile.h>
  26. #include <shim_sysv.h>
  27. #include <shim_table.h>
  28. #include <shim_utils.h>
  29. #define SEM_HASH_LEN 8
  30. #define SEM_HASH_NUM (1 << SEM_HASH_LEN)
  31. #define SEM_HASH_MASK (SEM_HASH_NUM - 1)
  32. #define SEM_HASH(idx) ((idx) & SEM_HASH_MASK)
  33. /* The sem_list links shim_sem_handle objects by the list field.
  34. * The sem_key_hlist links them by key_hlist, and qid_hlist by qid_hlist */
  35. DEFINE_LISTP(shim_sem_handle);
  36. static LISTP_TYPE(shim_sem_handle) sem_list;
  37. static LISTP_TYPE(shim_sem_handle) sem_key_hlist[SEM_HASH_NUM];
  38. static LISTP_TYPE(shim_sem_handle) sem_sid_hlist[SEM_HASH_NUM];
  39. static struct shim_lock sem_list_lock;
  40. DEFINE_PROFILE_CATEGORY(sysv_sem, );
  41. #define SEM_TO_HANDLE(semhdl) container_of((semhdl), struct shim_handle, info.sem)
  42. static int __add_sem_handle(unsigned long key, IDTYPE semid, int nsems, bool owned,
  43. struct shim_sem_handle** semhdl) {
  44. assert(locked(&sem_list_lock));
  45. LISTP_TYPE(shim_sem_handle)* key_head =
  46. (key != IPC_PRIVATE) ? &sem_key_hlist[SEM_HASH(key)] : NULL;
  47. LISTP_TYPE(shim_sem_handle)* sid_head = semid ? &sem_sid_hlist[SEM_HASH(semid)] : NULL;
  48. struct shim_sem_handle* tmp;
  49. struct shim_handle* hdl = NULL;
  50. int ret = 0;
  51. if (key_head)
  52. LISTP_FOR_EACH_ENTRY(tmp, key_head, key_hlist) {
  53. if (tmp->semkey == key) {
  54. if (tmp->semid == semid)
  55. goto out;
  56. return -EEXIST;
  57. }
  58. }
  59. if (sid_head)
  60. LISTP_FOR_EACH_ENTRY(tmp, sid_head, sid_hlist) {
  61. if (tmp->semid == semid) {
  62. if (key)
  63. tmp->semkey = key;
  64. goto out;
  65. }
  66. }
  67. hdl = get_new_handle();
  68. if (!hdl)
  69. return -ENOMEM;
  70. tmp = &hdl->info.sem;
  71. hdl->type = TYPE_SEM;
  72. tmp->semkey = key;
  73. tmp->semid = semid;
  74. tmp->owned = owned;
  75. tmp->event = DkNotificationEventCreate(PAL_FALSE);
  76. if (owned && nsems) {
  77. tmp->nsems = nsems;
  78. tmp->sems = malloc(sizeof(struct sem_obj) * nsems);
  79. if (!tmp->sems) {
  80. ret = -ENOMEM;
  81. goto failed;
  82. }
  83. for (int i = 0; i < nsems; i++) {
  84. tmp->sems[i].num = i;
  85. tmp->sems[i].val = 0;
  86. tmp->sems[i].host_sem_id = 0;
  87. tmp->sems[i].host_sem = NULL;
  88. INIT_LISTP(&tmp->sems[i].ops);
  89. INIT_LISTP(&tmp->sems[i].next_ops);
  90. }
  91. }
  92. INIT_LISTP(&tmp->migrated);
  93. INIT_LIST_HEAD(tmp, list);
  94. get_handle(hdl);
  95. LISTP_ADD_TAIL(tmp, &sem_list, list);
  96. INIT_LIST_HEAD(tmp, key_hlist);
  97. if (key_head) {
  98. get_handle(hdl);
  99. LISTP_ADD(tmp, key_head, key_hlist);
  100. }
  101. if (sid_head) {
  102. get_handle(hdl);
  103. LISTP_ADD(tmp, sid_head, sid_hlist);
  104. }
  105. out:
  106. if (!semhdl) {
  107. put_handle(hdl);
  108. return 0;
  109. }
  110. *semhdl = tmp;
  111. return 0;
  112. failed:
  113. put_handle(hdl);
  114. return ret;
  115. }
  116. int add_sem_handle(unsigned long key, IDTYPE id, int nsems, bool owned) {
  117. lock(&sem_list_lock);
  118. int ret = __add_sem_handle(key, id, nsems, owned, NULL);
  119. unlock(&sem_list_lock);
  120. return ret;
  121. }
  122. struct shim_sem_handle* get_sem_handle_by_key(unsigned long key) {
  123. LISTP_TYPE(shim_sem_handle)* key_head = &sem_key_hlist[SEM_HASH(key)];
  124. struct shim_sem_handle* tmp;
  125. struct shim_sem_handle* found = NULL;
  126. lock(&sem_list_lock);
  127. LISTP_FOR_EACH_ENTRY(tmp, key_head, key_hlist) {
  128. if (tmp->semkey == key) {
  129. found = tmp;
  130. break;
  131. }
  132. }
  133. if (found)
  134. get_handle(SEM_TO_HANDLE(found));
  135. unlock(&sem_list_lock);
  136. return found;
  137. }
  138. struct shim_sem_handle* get_sem_handle_by_id(IDTYPE semid) {
  139. LISTP_TYPE(shim_sem_handle)* sid_head = &sem_sid_hlist[SEM_HASH(semid)];
  140. struct shim_sem_handle* tmp;
  141. struct shim_sem_handle* found = NULL;
  142. lock(&sem_list_lock);
  143. LISTP_FOR_EACH_ENTRY(tmp, sid_head, sid_hlist) {
  144. if (tmp->semid == semid) {
  145. found = tmp;
  146. break;
  147. }
  148. }
  149. if (found)
  150. get_handle(SEM_TO_HANDLE(found));
  151. unlock(&sem_list_lock);
  152. return found;
  153. }
  154. void put_sem_handle(struct shim_sem_handle* sem) {
  155. put_handle(SEM_TO_HANDLE(sem));
  156. }
  157. static int __del_sem_handle(struct shim_sem_handle* sem) {
  158. struct shim_handle* hdl = SEM_TO_HANDLE(sem);
  159. assert(locked(&hdl->lock));
  160. if (sem->deleted)
  161. return 0;
  162. sem->deleted = true;
  163. lock(&sem_list_lock);
  164. LISTP_DEL_INIT(sem, &sem_list, list);
  165. put_handle(hdl);
  166. if (!LIST_EMPTY(sem, key_hlist)) {
  167. // DEP: Yuck
  168. LISTP_TYPE(shim_sem_handle)* key_head = &sem_key_hlist[SEM_HASH(sem->semkey)];
  169. LISTP_DEL_INIT(sem, key_head, key_hlist);
  170. put_handle(hdl);
  171. }
  172. if (!LIST_EMPTY(sem, sid_hlist)) {
  173. // DEP: Yuck
  174. LISTP_TYPE(shim_sem_handle)* sid_head = &sem_sid_hlist[SEM_HASH(sem->semid)];
  175. LISTP_DEL_INIT(sem, sid_head, sid_hlist);
  176. put_handle(hdl);
  177. }
  178. unlock(&sem_list_lock);
  179. return 0;
  180. }
  181. int del_sem_handle(struct shim_sem_handle* sem) {
  182. struct shim_handle* hdl = SEM_TO_HANDLE(sem);
  183. lock(&hdl->lock);
  184. int ret = __del_sem_handle(sem);
  185. unlock(&hdl->lock);
  186. return ret;
  187. }
  188. int shim_do_semget(key_t key, int nsems, int semflg) {
  189. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  190. IDTYPE semid = 0;
  191. int ret;
  192. if (!create_lock_runtime(&sem_list_lock)) {
  193. return -ENOMEM;
  194. }
  195. if (key != IPC_PRIVATE) {
  196. struct shim_sem_handle* sem = get_sem_handle_by_key(key);
  197. if (sem) {
  198. semid = sem->semid;
  199. put_sem_handle(sem);
  200. return (semflg & IPC_EXCL) ? -EEXIST : (int)semid;
  201. }
  202. }
  203. struct sysv_key k;
  204. k.key = key;
  205. k.type = SYSV_SEM;
  206. if (semflg & IPC_CREAT) {
  207. do {
  208. semid = allocate_sysv(0, 0);
  209. if (!semid)
  210. semid = ipc_sysv_lease_send(NULL);
  211. } while (!semid);
  212. if (key != IPC_PRIVATE) {
  213. if ((ret = ipc_sysv_tellkey_send(NULL, 0, &k, semid, 0)) < 0) {
  214. release_sysv(semid);
  215. return ret;
  216. }
  217. }
  218. add_sem_handle(key, semid, nsems, true);
  219. } else {
  220. if ((ret = ipc_sysv_findkey_send(&k)) < 0)
  221. return ret;
  222. semid = ret;
  223. if ((ret = ipc_sysv_query_send(semid)) < 0)
  224. return ret;
  225. }
  226. return semid;
  227. }
  228. static int connect_sem_handle(int semid, int nsems, struct shim_sem_handle** semp) {
  229. struct shim_sem_handle* sem = get_sem_handle_by_id(semid);
  230. int ret;
  231. if (!sem) {
  232. if ((ret = ipc_sysv_query_send(semid)) < 0)
  233. return ret;
  234. if (!sem) {
  235. lock(&sem_list_lock);
  236. ret = __add_sem_handle(IPC_PRIVATE, semid, nsems, false, &sem);
  237. unlock(&sem_list_lock);
  238. if (ret < 0)
  239. return ret;
  240. }
  241. }
  242. *semp = sem;
  243. return 0;
  244. }
  245. int recover_sem_ownership(struct shim_sem_handle* sem, struct sem_backup* backups, int nbackups,
  246. struct sem_client_backup* clients, int nclients) {
  247. struct shim_handle* hdl = SEM_TO_HANDLE(sem);
  248. lock(&hdl->lock);
  249. assert(!sem->owned);
  250. assert(!sem->nsems && !sem->sems);
  251. sem->nsems = nbackups;
  252. if (!sem->sems && !(sem->sems = malloc(sizeof(struct sem_obj) * nbackups)))
  253. goto out;
  254. for (int i = 0; i < nbackups; i++) {
  255. sem->sems[i].num = i;
  256. sem->sems[i].val = backups[i].val;
  257. sem->sems[i].zcnt = backups[i].zcnt;
  258. sem->sems[i].ncnt = backups[i].ncnt;
  259. sem->sems[i].pid = backups[i].pid;
  260. INIT_LISTP(&sem->sems[i].ops);
  261. INIT_LISTP(&sem->sems[i].next_ops);
  262. }
  263. for (int i = 0; i < nclients; i++) {
  264. struct sem_ops* op = malloc(sizeof(struct sem_ops));
  265. if (!op)
  266. continue;
  267. op->stat.completed = false;
  268. op->stat.failed = false;
  269. op->stat.nops = clients[i].nops;
  270. op->stat.current = clients[i].current;
  271. op->stat.timeout = -1;
  272. op->client.vmid = clients[i].vmid;
  273. op->client.port = NULL;
  274. op->client.seq = clients[i].seq;
  275. INIT_LIST_HEAD(op, progress);
  276. LISTP_ADD_TAIL(op, &sem->migrated, progress);
  277. }
  278. sem->owned = true;
  279. DkEventSet(sem->event);
  280. out:
  281. unlock(&hdl->lock);
  282. return 0;
  283. }
  284. static int __do_semop(int semid, struct sembuf* sops, unsigned int nsops, unsigned long timeout) {
  285. int ret;
  286. struct shim_sem_handle* sem;
  287. size_t nsems = 0;
  288. for (size_t i = 0; i < nsops; i++)
  289. if (sops[i].sem_num >= nsems)
  290. nsems = sops[i].sem_num + 1;
  291. if (!create_lock_runtime(&sem_list_lock)) {
  292. return -ENOMEM;
  293. }
  294. if ((ret = connect_sem_handle(semid, nsems, &sem)) < 0)
  295. return ret;
  296. ret = submit_sysv_sem(sem, sops, nsops, timeout, NULL);
  297. put_sem_handle(sem);
  298. return ret;
  299. }
  300. int shim_do_semop(int semid, struct sembuf* sops, unsigned int nsops) {
  301. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  302. return __do_semop(semid, sops, nsops, IPC_SEM_NOTIMEOUT);
  303. }
  304. int shim_do_semtimedop(int semid, struct sembuf* sops, unsigned int nsops,
  305. const struct timespec* timeout) {
  306. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  307. return __do_semop(semid, sops, nsops, timeout->tv_sec * 1000000000ULL + timeout->tv_nsec);
  308. }
  309. int shim_do_semctl(int semid, int semnum, int cmd, unsigned long arg) {
  310. INC_PROFILE_OCCURENCE(syscall_use_ipc);
  311. struct shim_sem_handle* sem;
  312. int ret;
  313. if (!create_lock_runtime(&sem_list_lock)) {
  314. return -ENOMEM;
  315. }
  316. if ((ret = connect_sem_handle(semid, 0, &sem)) < 0)
  317. return ret;
  318. struct shim_handle* hdl = SEM_TO_HANDLE(sem);
  319. lock(&hdl->lock);
  320. switch (cmd) {
  321. case IPC_RMID: {
  322. if (!sem->owned) {
  323. ret = ipc_sysv_delres_send(NULL, 0, semid, SYSV_SEM);
  324. if (ret < 0)
  325. goto out;
  326. }
  327. __del_sem_handle(sem);
  328. goto out;
  329. }
  330. }
  331. if (sem->owned) {
  332. if (sem->deleted) {
  333. ret = -EIDRM;
  334. goto out;
  335. }
  336. switch (cmd) {
  337. case GETALL:
  338. for (int i = 0; i < sem->nsems; i++) {
  339. unsigned short val = sem->sems[i].val;
  340. ((unsigned short*)arg)[i] = val;
  341. }
  342. break;
  343. case GETNCNT:
  344. ret = sem->sems[semnum].ncnt;
  345. break;
  346. case GETPID:
  347. ret = sem->sems[semnum].pid;
  348. break;
  349. case GETVAL:
  350. ret = sem->sems[semnum].val;
  351. break;
  352. case GETZCNT:
  353. ret = sem->sems[semnum].zcnt;
  354. break;
  355. case SETALL:
  356. for (int i = 0; i < sem->nsems; i++) {
  357. unsigned short val = ((unsigned short*)arg)[i];
  358. sem->sems[i].val = val;
  359. }
  360. break;
  361. case SETVAL: {
  362. unsigned short val = arg;
  363. sem->sems[semnum].val = val;
  364. break;
  365. }
  366. }
  367. } else {
  368. switch (cmd) {
  369. case GETALL:
  370. case SETALL: {
  371. int valsize = sem->nsems * sizeof(unsigned short);
  372. ret = ipc_sysv_semctl_send(sem->semid, 0, cmd, (unsigned short*)arg, valsize);
  373. break;
  374. }
  375. case GETVAL:
  376. case GETNCNT:
  377. case GETPID:
  378. case GETZCNT: {
  379. int valsize = sizeof(unsigned short);
  380. unsigned short val;
  381. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd, &val, valsize);
  382. if (!ret)
  383. ret = val;
  384. break;
  385. }
  386. case SETVAL: {
  387. unsigned short val = arg;
  388. ret = ipc_sysv_semctl_send(sem->semid, semnum, cmd, &val, sizeof(unsigned short));
  389. break;
  390. }
  391. }
  392. }
  393. out:
  394. unlock(&hdl->lock);
  395. put_sem_handle(sem);
  396. return ret;
  397. }
  398. static bool __handle_sysv_sems(struct shim_sem_handle* sem) {
  399. bool progressed = false;
  400. bool setevent = false;
  401. struct sem_obj* sobj;
  402. for (sobj = sem->sems; sobj < &sem->sems[sem->nsems]; sobj++)
  403. LISTP_SPLICE_TAIL_INIT(&sobj->next_ops, &sobj->ops, progress, sem_ops);
  404. for (sobj = sem->sems; sobj < &sem->sems[sem->nsems]; sobj++) {
  405. struct sem_ops* sops;
  406. struct sem_ops* n;
  407. LISTP_FOR_EACH_ENTRY_SAFE(sops, n, &sobj->ops, progress) {
  408. struct sembuf* op = &sops->ops[sops->stat.current];
  409. assert(op->sem_num == sobj->num);
  410. // first_iter is a variable defined by LISTP_FOR_EACH_ENTRY_SAFE
  411. // The second part of this assertion is only valid after the first attempt
  412. assert(first_iter || (sops != n));
  413. if (sops->stat.completed)
  414. goto send_result;
  415. again:
  416. if (op->sem_op > 0) {
  417. sobj->val += op->sem_op;
  418. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op, sobj->val);
  419. } else if (op->sem_op < 0) {
  420. if (sobj->val < -op->sem_op) {
  421. if (op->sem_flg & IPC_NOWAIT) {
  422. debug("sem %u: wait for %u failed\n", sobj->num, -op->sem_op);
  423. goto failed;
  424. }
  425. continue;
  426. }
  427. sobj->val -= -op->sem_op;
  428. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op, sobj->val);
  429. } else {
  430. if (sobj->val) {
  431. if (op->sem_flg & IPC_NOWAIT) {
  432. debug("sem %u: wait for 0 failed\n", sobj->num);
  433. goto failed;
  434. }
  435. continue;
  436. }
  437. debug("sem %u: wait for 0\n", sobj->num);
  438. }
  439. progressed = true;
  440. sops->stat.current++;
  441. if (sops->stat.current == sops->stat.nops) {
  442. sops->stat.completed = true;
  443. goto send_result;
  444. }
  445. op = &sops->ops[sops->stat.current];
  446. if (op->sem_num != sobj->num) {
  447. LISTP_MOVE_TAIL(sops, &sem->sems[op->sem_num].next_ops, &sobj->ops, progress);
  448. continue;
  449. }
  450. goto again;
  451. failed:
  452. progressed = true;
  453. sops->stat.failed = true;
  454. send_result:
  455. /* Chia-Che 10/17/17: If the code reaches this point, sops should
  456. * still be in sobj->ops. */
  457. LISTP_DEL_INIT(sops, &sobj->ops, progress);
  458. sem->nreqs--;
  459. if (!sops->client.vmid) {
  460. setevent = true;
  461. continue;
  462. }
  463. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_resp));
  464. struct shim_ipc_msg* resp_msg = __alloca(total_msg_size);
  465. init_ipc_msg(resp_msg, IPC_RESP, total_msg_size, sops->client.vmid);
  466. resp_msg->seq = sops->client.seq;
  467. struct shim_ipc_resp* resp = (struct shim_ipc_resp*)resp_msg->msg;
  468. resp->retval = sops->stat.completed ? 0 : -EAGAIN;
  469. send_ipc_message(resp_msg, sops->client.port);
  470. put_ipc_port(sops->client.port);
  471. sops->client.vmid = 0;
  472. sops->client.port = NULL;
  473. sops->client.seq = 0;
  474. free(sops);
  475. }
  476. }
  477. if (setevent)
  478. DkEventSet(sem->event);
  479. return progressed;
  480. }
  481. static void __handle_one_sysv_sem(struct shim_sem_handle* sem, struct sem_stat* stat,
  482. struct sembuf* sops) {
  483. bool progressed = false;
  484. again:
  485. while (stat->current < stat->nops) {
  486. struct sem_obj* sobj = &sem->sems[sops[stat->current].sem_num];
  487. struct sembuf* op = &sops[stat->current];
  488. if (op->sem_op > 0) {
  489. progressed = true;
  490. sobj->val += op->sem_op;
  491. debug("sem %u: add %u => %u\n", sobj->num, op->sem_op, sobj->val);
  492. } else if (op->sem_op < 0) {
  493. if (sobj->val < -op->sem_op) {
  494. if (op->sem_flg & IPC_NOWAIT) {
  495. stat->failed = true;
  496. debug("sem %u: wait for %u failed\n", sobj->num, -op->sem_op);
  497. return;
  498. }
  499. goto failed;
  500. }
  501. progressed = true;
  502. sobj->val -= -op->sem_op;
  503. debug("sem %u: wait for %u => %u\n", sobj->num, -op->sem_op, sobj->val);
  504. } else {
  505. if (sobj->val) {
  506. if (op->sem_flg & IPC_NOWAIT) {
  507. stat->failed = true;
  508. debug("sem %u: wait for 0 failed\n", sobj->num);
  509. return;
  510. }
  511. goto failed;
  512. }
  513. progressed = true;
  514. debug("sem %u: wait for 0\n", sobj->num);
  515. }
  516. stat->current++;
  517. }
  518. stat->completed = true;
  519. failed:
  520. if (progressed) {
  521. while (__handle_sysv_sems(sem))
  522. ;
  523. progressed = false;
  524. if (!stat->completed)
  525. goto again;
  526. }
  527. }
  528. #if MIGRATE_SYSV_SEM == 1
  529. static int sem_balance_migrate(struct shim_handle* hdl, struct sysv_client* client);
  530. static struct sysv_balance_policy sem_policy = {
  531. .score_decay = SEM_SCORE_DECAY,
  532. .score_max = SEM_SCORE_MAX,
  533. .balance_threshold = SEM_BALANCE_THRESHOLD,
  534. .migrate = &sem_balance_migrate,
  535. };
  536. #endif
  537. DEFINE_PROFILE_CATEGORY(submit_sysv_sem, sysv_sem);
  538. DEFINE_PROFILE_INTERVAL(sem_prepare_stat, submit_sysv_sem);
  539. DEFINE_PROFILE_INTERVAL(sem_lock_handle, submit_sysv_sem);
  540. DEFINE_PROFILE_INTERVAL(sem_count_score, submit_sysv_sem);
  541. DEFINE_PROFILE_INTERVAL(sem_handle_by_shared_semaphore, submit_sysv_sem);
  542. DEFINE_PROFILE_INTERVAL(sem_send_ipc_movres, submit_sysv_sem);
  543. DEFINE_PROFILE_INTERVAL(sem_send_ipc_semop, submit_sysv_sem);
  544. DEFINE_PROFILE_INTERVAL(sem_handle_one_sysv_sem, submit_sysv_sem);
  545. DEFINE_PROFILE_INTERVAL(sem_send_ipc_response, submit_sysv_sem);
  546. DEFINE_PROFILE_INTERVAL(sem_alloc_semop, submit_sysv_sem);
  547. DEFINE_PROFILE_INTERVAL(sem_append_semop, submit_sysv_sem);
  548. DEFINE_PROFILE_INTERVAL(sem_wait_for_complete, submit_sysv_sem);
  549. int submit_sysv_sem(struct shim_sem_handle* sem, struct sembuf* sops, int nsops,
  550. unsigned long timeout, struct sysv_client* client) {
  551. BEGIN_PROFILE_INTERVAL();
  552. int ret = 0;
  553. struct shim_handle* hdl = SEM_TO_HANDLE(sem);
  554. struct sem_ops* sem_ops = NULL;
  555. bool malloced = false;
  556. struct sem_stat stat;
  557. stat.nops = nsops;
  558. stat.current = 0;
  559. stat.timeout = timeout;
  560. stat.completed = false;
  561. stat.failed = false;
  562. SAVE_PROFILE_INTERVAL(sem_prepare_stat);
  563. lock(&hdl->lock);
  564. SAVE_PROFILE_INTERVAL(sem_lock_handle);
  565. if (sem->deleted) {
  566. ret = -EIDRM;
  567. goto out_locked;
  568. }
  569. IDTYPE semid = sem->semid;
  570. bool sendreply = false;
  571. unsigned long seq = client ? client->seq : 0;
  572. int score = 0;
  573. for (int i = 0; i < nsops; i++) {
  574. struct sembuf* op = &sops[i];
  575. if (op->sem_op > 0) {
  576. score += SEM_POSITIVE_SCORE(op->sem_num);
  577. } else if (op->sem_op < 0) {
  578. score += SEM_NEGATIVE_SCORE(-op->sem_num);
  579. sendreply = true;
  580. } else {
  581. score += SEM_ZERO_SCORE;
  582. sendreply = true;
  583. }
  584. }
  585. SAVE_PROFILE_INTERVAL(sem_count_score);
  586. if (sem->deleted) {
  587. if (!client || sendreply) {
  588. ret = -EIDRM;
  589. goto out_locked;
  590. }
  591. ret = ipc_sysv_delres_send(client->port, client->vmid, sem->semid, SYSV_SEM);
  592. goto out_locked;
  593. }
  594. #if MIGRATE_SYSV_SEM == 1
  595. if (sem->owned) {
  596. __balance_sysv_score(&sem_policy, hdl, sem->scores, MAX_SYSV_CLIENTS, client, score);
  597. if (!sem->owned && client) {
  598. struct shim_ipc_info* owner = sem->owner;
  599. assert(owner);
  600. ret = ipc_sysv_movres_send(client, owner->vmid, qstrgetstr(&owner->uri), sem->lease,
  601. sem->semid, SYSV_SEM);
  602. goto out_locked;
  603. }
  604. }
  605. #endif
  606. if (!sem->owned) {
  607. if (client) {
  608. struct shim_ipc_info* owner = sem->owner;
  609. ret = owner ? ipc_sysv_movres_send(client, owner->vmid, qstrgetstr(&owner->uri),
  610. sem->lease, sem->semid, SYSV_SEM)
  611. : -ECONNREFUSED;
  612. SAVE_PROFILE_INTERVAL(sem_send_ipc_movres);
  613. goto out_locked;
  614. }
  615. unowned:
  616. unlock(&hdl->lock);
  617. ret = ipc_sysv_semop_send(semid, sops, nsops, timeout, &seq);
  618. if (ret != -EAGAIN && ret != -ECONNREFUSED)
  619. goto out;
  620. lock(&hdl->lock);
  621. SAVE_PROFILE_INTERVAL(sem_send_ipc_semop);
  622. if (!sem->owned)
  623. goto out_locked;
  624. }
  625. if (seq) {
  626. struct sem_ops* op;
  627. LISTP_FOR_EACH_ENTRY(op, &sem->migrated, progress) {
  628. if (op->client.vmid == (client ? client->vmid : cur_process.vmid) &&
  629. seq == op->client.seq) {
  630. LISTP_DEL_INIT(op, &sem->migrated, progress);
  631. sem_ops = op;
  632. stat = sem_ops->stat;
  633. malloced = true;
  634. break;
  635. }
  636. }
  637. }
  638. __handle_one_sysv_sem(sem, &stat, sops);
  639. SAVE_PROFILE_INTERVAL(sem_handle_one_sysv_sem);
  640. if (stat.completed || stat.failed) {
  641. ret = stat.completed ? 0 : -EAGAIN;
  642. if (client && sendreply) {
  643. size_t total_msg_size = get_ipc_msg_size(sizeof(struct shim_ipc_resp));
  644. struct shim_ipc_msg* resp_msg = __alloca(total_msg_size);
  645. init_ipc_msg(resp_msg, IPC_RESP, total_msg_size, client->vmid);
  646. resp_msg->seq = client->seq;
  647. struct shim_ipc_resp* resp = (struct shim_ipc_resp*)resp_msg->msg;
  648. resp->retval = ret;
  649. ret = send_ipc_message(resp_msg, client->port);
  650. }
  651. SAVE_PROFILE_INTERVAL(sem_send_ipc_response);
  652. goto out_locked;
  653. }
  654. if (client) {
  655. assert(sendreply);
  656. if (!sem_ops || !malloced) {
  657. sem_ops = malloc(sizeof(struct sem_ops) + sizeof(struct sembuf) * nsops);
  658. if (!sem_ops) {
  659. ret = -ENOMEM;
  660. goto out_locked;
  661. }
  662. sem_ops->client.vmid = 0;
  663. sem_ops->client.port = NULL;
  664. sem_ops->client.seq = 0;
  665. INIT_LIST_HEAD(sem_ops, progress);
  666. malloced = true;
  667. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  668. }
  669. } else {
  670. if (!sem_ops) {
  671. sem_ops = __alloca(sizeof(struct sem_ops) + sizeof(struct sembuf) * nsops);
  672. sem_ops->client.vmid = 0;
  673. sem_ops->client.port = NULL;
  674. sem_ops->client.seq = 0;
  675. INIT_LIST_HEAD(sem_ops, progress);
  676. SAVE_PROFILE_INTERVAL(sem_alloc_semop);
  677. }
  678. }
  679. sem_ops->stat = stat;
  680. for (int i = 0; i < nsops; i++) {
  681. sem_ops->ops[i] = sops[i];
  682. }
  683. LISTP_TYPE(sem_ops)* next_ops = &sem->sems[sops[stat.current].sem_num].next_ops;
  684. assert(LIST_EMPTY(sem_ops, progress));
  685. LISTP_ADD_TAIL(sem_ops, next_ops, progress);
  686. // CHECK_LIST_HEAD(next_ops);
  687. sem->nreqs++;
  688. SAVE_PROFILE_INTERVAL(sem_append_semop);
  689. if (client) {
  690. assert(sendreply);
  691. add_ipc_port(client->port, client->vmid, IPC_PORT_SYSVCON, NULL);
  692. get_ipc_port(client->port);
  693. sem_ops->client = *client;
  694. sem_ops = NULL;
  695. goto out_locked;
  696. }
  697. while (!sem_ops->stat.completed && !sem_ops->stat.failed) {
  698. if (!sem->owned) {
  699. /* Chia-Che 10/17/17: sem_ops may move from semaphore to semaphore
  700. base on its current state */
  701. next_ops = &sem->sems[sem_ops->ops[sem_ops->stat.current].sem_num].next_ops;
  702. LISTP_DEL_INIT(sem_ops, next_ops, progress);
  703. goto unowned;
  704. }
  705. unlock(&hdl->lock);
  706. object_wait_with_retry(sem->event);
  707. lock(&hdl->lock);
  708. SAVE_PROFILE_INTERVAL(sem_wait_for_complete);
  709. }
  710. ret = sem_ops->stat.completed ? 0 : -EAGAIN;
  711. out_locked:
  712. unlock(&hdl->lock);
  713. out:
  714. if (sem_ops && malloced)
  715. free(sem_ops);
  716. return ret;
  717. }
  718. #if MIGRATE_SYSV_SEM == 1
  719. static int sem_balance_migrate(struct shim_handle* hdl, struct sysv_client* src) {
  720. struct shim_sem_handle* sem = &hdl->info.sem;
  721. int ret = 0;
  722. debug("trigger semaphore balancing, migrate to process %u\n", src->vmid);
  723. struct sem_backup* sem_backups = __alloca(sizeof(struct sem_backup) * sem->nsems);
  724. struct sem_client_backup* clients = __alloca(sizeof(struct sem_client_backup) * sem->nreqs);
  725. int sem_cnt = 0, client_cnt = 0;
  726. struct sem_obj* sobj;
  727. for (sobj = sem->sems; sobj < &sem->sems[sem->nsems]; sobj++) {
  728. assert(sem_cnt < sem->nsems);
  729. struct sem_backup* b = sem_backups + (sem_cnt++);
  730. b->val = sobj->val;
  731. b->zcnt = sobj->zcnt;
  732. b->ncnt = sobj->ncnt;
  733. b->pid = sobj->pid;
  734. LISTP_SPLICE_TAIL(&sobj->next_ops, &sobj->ops, progress, sem_ops);
  735. struct sem_ops* sops;
  736. LISTP_FOR_EACH_ENTRY(sops, &sobj->ops, progress) {
  737. assert(client_cnt < sem->nreqs);
  738. struct sem_client_backup* c = clients + (client_cnt)++;
  739. c->vmid = sops->client.vmid;
  740. c->seq = sops->client.seq;
  741. c->current = sops->stat.current;
  742. c->nops = sops->stat.nops;
  743. }
  744. }
  745. struct shim_ipc_info* info = lookup_ipc_info(src->vmid);
  746. if (!info)
  747. goto out;
  748. ipc_sysv_sublease_send(src->vmid, sem->semid, qstrgetstr(&info->uri), &sem->lease);
  749. ret = ipc_sysv_semmov_send(src->port, src->vmid, sem->semid, sem->lease, sem_backups, sem_cnt,
  750. clients, client_cnt, sem->scores, MAX_SYSV_CLIENTS);
  751. if (ret < 0)
  752. goto failed_info;
  753. sem->owned = false;
  754. sem->owner = info;
  755. for (sobj = sem->sems; sobj < &sem->sems[sem->nsems]; sobj++) {
  756. struct sem_ops* sops;
  757. struct sem_ops* n;
  758. LISTP_FOR_EACH_ENTRY_SAFE(sops, n, &sobj->ops, progress) {
  759. LISTP_DEL_INIT(sops, &sobj->ops, progress);
  760. sem->nreqs--;
  761. sops->stat.failed = true;
  762. if (!sops->client.vmid)
  763. continue;
  764. ipc_sysv_movres_send(&sops->client, src->vmid, qstrgetstr(&info->uri), sem->lease,
  765. sem->semid, SYSV_SEM);
  766. put_ipc_port(sops->client.port);
  767. free(sops);
  768. }
  769. }
  770. sem->nsems = 0;
  771. free(sem->sems);
  772. sem->sems = NULL;
  773. ret = 0;
  774. DkEventSet(sem->event);
  775. goto out;
  776. failed_info:
  777. put_ipc_info(info);
  778. out:
  779. return ret;
  780. }
  781. #endif