wordcopy.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /* _memcopy.c -- subroutines for memory copy functions.
  2. Copyright (C) 1991, 1996 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
  18. #include <stddef.h>
  19. #include <sysdeps/generic/memcopy.h>
  20. /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
  21. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  22. Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
  23. void
  24. _wordcopy_fwd_aligned (dstp, srcp, len)
  25. long int dstp;
  26. long int srcp;
  27. size_t len;
  28. {
  29. op_t a0, a1;
  30. switch (len % 8)
  31. {
  32. case 2:
  33. a0 = ((op_t *) srcp)[0];
  34. srcp -= 6 * OPSIZ;
  35. dstp -= 7 * OPSIZ;
  36. len += 6;
  37. goto do1;
  38. case 3:
  39. a1 = ((op_t *) srcp)[0];
  40. srcp -= 5 * OPSIZ;
  41. dstp -= 6 * OPSIZ;
  42. len += 5;
  43. goto do2;
  44. case 4:
  45. a0 = ((op_t *) srcp)[0];
  46. srcp -= 4 * OPSIZ;
  47. dstp -= 5 * OPSIZ;
  48. len += 4;
  49. goto do3;
  50. case 5:
  51. a1 = ((op_t *) srcp)[0];
  52. srcp -= 3 * OPSIZ;
  53. dstp -= 4 * OPSIZ;
  54. len += 3;
  55. goto do4;
  56. case 6:
  57. a0 = ((op_t *) srcp)[0];
  58. srcp -= 2 * OPSIZ;
  59. dstp -= 3 * OPSIZ;
  60. len += 2;
  61. goto do5;
  62. case 7:
  63. a1 = ((op_t *) srcp)[0];
  64. srcp -= 1 * OPSIZ;
  65. dstp -= 2 * OPSIZ;
  66. len += 1;
  67. goto do6;
  68. case 0:
  69. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  70. return;
  71. a0 = ((op_t *) srcp)[0];
  72. srcp -= 0 * OPSIZ;
  73. dstp -= 1 * OPSIZ;
  74. goto do7;
  75. case 1:
  76. a1 = ((op_t *) srcp)[0];
  77. srcp -=-1 * OPSIZ;
  78. dstp -= 0 * OPSIZ;
  79. len -= 1;
  80. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  81. goto do0;
  82. goto do8; /* No-op. */
  83. }
  84. do
  85. {
  86. do8:
  87. a0 = ((op_t *) srcp)[0];
  88. ((op_t *) dstp)[0] = a1;
  89. do7:
  90. a1 = ((op_t *) srcp)[1];
  91. ((op_t *) dstp)[1] = a0;
  92. do6:
  93. a0 = ((op_t *) srcp)[2];
  94. ((op_t *) dstp)[2] = a1;
  95. do5:
  96. a1 = ((op_t *) srcp)[3];
  97. ((op_t *) dstp)[3] = a0;
  98. do4:
  99. a0 = ((op_t *) srcp)[4];
  100. ((op_t *) dstp)[4] = a1;
  101. do3:
  102. a1 = ((op_t *) srcp)[5];
  103. ((op_t *) dstp)[5] = a0;
  104. do2:
  105. a0 = ((op_t *) srcp)[6];
  106. ((op_t *) dstp)[6] = a1;
  107. do1:
  108. a1 = ((op_t *) srcp)[7];
  109. ((op_t *) dstp)[7] = a0;
  110. srcp += 8 * OPSIZ;
  111. dstp += 8 * OPSIZ;
  112. len -= 8;
  113. }
  114. while (len != 0);
  115. /* This is the right position for do0. Please don't move
  116. it into the loop. */
  117. do0:
  118. ((op_t *) dstp)[0] = a1;
  119. }
  120. /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
  121. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  122. DSTP should be aligned for memory operations on `op_t's, but SRCP must
  123. *not* be aligned. */
  124. void
  125. _wordcopy_fwd_dest_aligned (dstp, srcp, len)
  126. long int dstp;
  127. long int srcp;
  128. size_t len;
  129. {
  130. op_t a0, a1, a2, a3;
  131. int sh_1, sh_2;
  132. /* Calculate how to shift a word read at the memory operation
  133. aligned srcp to make it aligned for copy. */
  134. sh_1 = 8 * (srcp % OPSIZ);
  135. sh_2 = 8 * OPSIZ - sh_1;
  136. /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
  137. it points in the middle of. */
  138. srcp &= -OPSIZ;
  139. switch (len % 4)
  140. {
  141. case 2:
  142. a1 = ((op_t *) srcp)[0];
  143. a2 = ((op_t *) srcp)[1];
  144. srcp -= 1 * OPSIZ;
  145. dstp -= 3 * OPSIZ;
  146. len += 2;
  147. goto do1;
  148. case 3:
  149. a0 = ((op_t *) srcp)[0];
  150. a1 = ((op_t *) srcp)[1];
  151. srcp -= 0 * OPSIZ;
  152. dstp -= 2 * OPSIZ;
  153. len += 1;
  154. goto do2;
  155. case 0:
  156. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  157. return;
  158. a3 = ((op_t *) srcp)[0];
  159. a0 = ((op_t *) srcp)[1];
  160. srcp -=-1 * OPSIZ;
  161. dstp -= 1 * OPSIZ;
  162. len += 0;
  163. goto do3;
  164. case 1:
  165. a2 = ((op_t *) srcp)[0];
  166. a3 = ((op_t *) srcp)[1];
  167. srcp -=-2 * OPSIZ;
  168. dstp -= 0 * OPSIZ;
  169. len -= 1;
  170. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  171. goto do0;
  172. goto do4; /* No-op. */
  173. }
  174. do
  175. {
  176. do4:
  177. a0 = ((op_t *) srcp)[0];
  178. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  179. do3:
  180. a1 = ((op_t *) srcp)[1];
  181. ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
  182. do2:
  183. a2 = ((op_t *) srcp)[2];
  184. ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
  185. do1:
  186. a3 = ((op_t *) srcp)[3];
  187. ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
  188. srcp += 4 * OPSIZ;
  189. dstp += 4 * OPSIZ;
  190. len -= 4;
  191. }
  192. while (len != 0);
  193. /* This is the right position for do0. Please don't move
  194. it into the loop. */
  195. do0:
  196. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  197. }
  198. /* _wordcopy_bwd_aligned -- Copy block finishing right before
  199. SRCP to block finishing right before DSTP with LEN `op_t' words
  200. (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
  201. operations on `op_t's. */
  202. void
  203. _wordcopy_bwd_aligned (dstp, srcp, len)
  204. long int dstp;
  205. long int srcp;
  206. size_t len;
  207. {
  208. op_t a0, a1;
  209. switch (len % 8)
  210. {
  211. case 2:
  212. srcp -= 2 * OPSIZ;
  213. dstp -= 1 * OPSIZ;
  214. a0 = ((op_t *) srcp)[1];
  215. len += 6;
  216. goto do1;
  217. case 3:
  218. srcp -= 3 * OPSIZ;
  219. dstp -= 2 * OPSIZ;
  220. a1 = ((op_t *) srcp)[2];
  221. len += 5;
  222. goto do2;
  223. case 4:
  224. srcp -= 4 * OPSIZ;
  225. dstp -= 3 * OPSIZ;
  226. a0 = ((op_t *) srcp)[3];
  227. len += 4;
  228. goto do3;
  229. case 5:
  230. srcp -= 5 * OPSIZ;
  231. dstp -= 4 * OPSIZ;
  232. a1 = ((op_t *) srcp)[4];
  233. len += 3;
  234. goto do4;
  235. case 6:
  236. srcp -= 6 * OPSIZ;
  237. dstp -= 5 * OPSIZ;
  238. a0 = ((op_t *) srcp)[5];
  239. len += 2;
  240. goto do5;
  241. case 7:
  242. srcp -= 7 * OPSIZ;
  243. dstp -= 6 * OPSIZ;
  244. a1 = ((op_t *) srcp)[6];
  245. len += 1;
  246. goto do6;
  247. case 0:
  248. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  249. return;
  250. srcp -= 8 * OPSIZ;
  251. dstp -= 7 * OPSIZ;
  252. a0 = ((op_t *) srcp)[7];
  253. goto do7;
  254. case 1:
  255. srcp -= 9 * OPSIZ;
  256. dstp -= 8 * OPSIZ;
  257. a1 = ((op_t *) srcp)[8];
  258. len -= 1;
  259. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  260. goto do0;
  261. goto do8; /* No-op. */
  262. }
  263. do
  264. {
  265. do8:
  266. a0 = ((op_t *) srcp)[7];
  267. ((op_t *) dstp)[7] = a1;
  268. do7:
  269. a1 = ((op_t *) srcp)[6];
  270. ((op_t *) dstp)[6] = a0;
  271. do6:
  272. a0 = ((op_t *) srcp)[5];
  273. ((op_t *) dstp)[5] = a1;
  274. do5:
  275. a1 = ((op_t *) srcp)[4];
  276. ((op_t *) dstp)[4] = a0;
  277. do4:
  278. a0 = ((op_t *) srcp)[3];
  279. ((op_t *) dstp)[3] = a1;
  280. do3:
  281. a1 = ((op_t *) srcp)[2];
  282. ((op_t *) dstp)[2] = a0;
  283. do2:
  284. a0 = ((op_t *) srcp)[1];
  285. ((op_t *) dstp)[1] = a1;
  286. do1:
  287. a1 = ((op_t *) srcp)[0];
  288. ((op_t *) dstp)[0] = a0;
  289. srcp -= 8 * OPSIZ;
  290. dstp -= 8 * OPSIZ;
  291. len -= 8;
  292. }
  293. while (len != 0);
  294. /* This is the right position for do0. Please don't move
  295. it into the loop. */
  296. do0:
  297. ((op_t *) dstp)[7] = a1;
  298. }
  299. /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
  300. before SRCP to block finishing right before DSTP with LEN `op_t'
  301. words (not LEN bytes!). DSTP should be aligned for memory
  302. operations on `op_t', but SRCP must *not* be aligned. */
  303. void
  304. _wordcopy_bwd_dest_aligned (dstp, srcp, len)
  305. long int dstp;
  306. long int srcp;
  307. size_t len;
  308. {
  309. op_t a0, a1, a2, a3;
  310. int sh_1, sh_2;
  311. /* Calculate how to shift a word read at the memory operation
  312. aligned srcp to make it aligned for copy. */
  313. sh_1 = 8 * (srcp % OPSIZ);
  314. sh_2 = 8 * OPSIZ - sh_1;
  315. /* Make srcp aligned by rounding it down to the beginning of the op_t
  316. it points in the middle of. */
  317. srcp &= -OPSIZ;
  318. srcp += OPSIZ;
  319. switch (len % 4)
  320. {
  321. case 2:
  322. srcp -= 3 * OPSIZ;
  323. dstp -= 1 * OPSIZ;
  324. a2 = ((op_t *) srcp)[2];
  325. a1 = ((op_t *) srcp)[1];
  326. len += 2;
  327. goto do1;
  328. case 3:
  329. srcp -= 4 * OPSIZ;
  330. dstp -= 2 * OPSIZ;
  331. a3 = ((op_t *) srcp)[3];
  332. a2 = ((op_t *) srcp)[2];
  333. len += 1;
  334. goto do2;
  335. case 0:
  336. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  337. return;
  338. srcp -= 5 * OPSIZ;
  339. dstp -= 3 * OPSIZ;
  340. a0 = ((op_t *) srcp)[4];
  341. a3 = ((op_t *) srcp)[3];
  342. goto do3;
  343. case 1:
  344. srcp -= 6 * OPSIZ;
  345. dstp -= 4 * OPSIZ;
  346. a1 = ((op_t *) srcp)[5];
  347. a0 = ((op_t *) srcp)[4];
  348. len -= 1;
  349. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  350. goto do0;
  351. goto do4; /* No-op. */
  352. }
  353. do
  354. {
  355. do4:
  356. a3 = ((op_t *) srcp)[3];
  357. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  358. do3:
  359. a2 = ((op_t *) srcp)[2];
  360. ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
  361. do2:
  362. a1 = ((op_t *) srcp)[1];
  363. ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
  364. do1:
  365. a0 = ((op_t *) srcp)[0];
  366. ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
  367. srcp -= 4 * OPSIZ;
  368. dstp -= 4 * OPSIZ;
  369. len -= 4;
  370. }
  371. while (len != 0);
  372. /* This is the right position for do0. Please don't move
  373. it into the loop. */
  374. do0:
  375. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  376. }