wordcopy.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /* _memcopy.c -- subroutines for memory copy functions.
  2. Copyright (C) 1991, 1996 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
  18. #include <stddef.h>
  19. #include <sysdeps/generic/memcopy.h>
  20. /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
  21. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  22. Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
  23. void _wordcopy_fwd_aligned (long int dstp, long int srcp, int len)
  24. {
  25. op_t a0 = 0, a1 = 0;
  26. switch (len % 8)
  27. {
  28. case 2:
  29. a0 = ((op_t *) srcp)[0];
  30. srcp -= 6 * OPSIZ;
  31. dstp -= 7 * OPSIZ;
  32. len += 6;
  33. goto do1;
  34. case 3:
  35. a1 = ((op_t *) srcp)[0];
  36. srcp -= 5 * OPSIZ;
  37. dstp -= 6 * OPSIZ;
  38. len += 5;
  39. goto do2;
  40. case 4:
  41. a0 = ((op_t *) srcp)[0];
  42. srcp -= 4 * OPSIZ;
  43. dstp -= 5 * OPSIZ;
  44. len += 4;
  45. goto do3;
  46. case 5:
  47. a1 = ((op_t *) srcp)[0];
  48. srcp -= 3 * OPSIZ;
  49. dstp -= 4 * OPSIZ;
  50. len += 3;
  51. goto do4;
  52. case 6:
  53. a0 = ((op_t *) srcp)[0];
  54. srcp -= 2 * OPSIZ;
  55. dstp -= 3 * OPSIZ;
  56. len += 2;
  57. goto do5;
  58. case 7:
  59. a1 = ((op_t *) srcp)[0];
  60. srcp -= 1 * OPSIZ;
  61. dstp -= 2 * OPSIZ;
  62. len += 1;
  63. goto do6;
  64. case 0:
  65. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  66. return;
  67. a0 = ((op_t *) srcp)[0];
  68. srcp -= 0 * OPSIZ;
  69. dstp -= 1 * OPSIZ;
  70. goto do7;
  71. case 1:
  72. a1 = ((op_t *) srcp)[0];
  73. srcp -=-1 * OPSIZ;
  74. dstp -= 0 * OPSIZ;
  75. len -= 1;
  76. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  77. goto do0;
  78. goto do8; /* No-op. */
  79. }
  80. do
  81. {
  82. do8:
  83. a0 = ((op_t *) srcp)[0];
  84. ((op_t *) dstp)[0] = a1;
  85. do7:
  86. a1 = ((op_t *) srcp)[1];
  87. ((op_t *) dstp)[1] = a0;
  88. do6:
  89. a0 = ((op_t *) srcp)[2];
  90. ((op_t *) dstp)[2] = a1;
  91. do5:
  92. a1 = ((op_t *) srcp)[3];
  93. ((op_t *) dstp)[3] = a0;
  94. do4:
  95. a0 = ((op_t *) srcp)[4];
  96. ((op_t *) dstp)[4] = a1;
  97. do3:
  98. a1 = ((op_t *) srcp)[5];
  99. ((op_t *) dstp)[5] = a0;
  100. do2:
  101. a0 = ((op_t *) srcp)[6];
  102. ((op_t *) dstp)[6] = a1;
  103. do1:
  104. a1 = ((op_t *) srcp)[7];
  105. ((op_t *) dstp)[7] = a0;
  106. srcp += 8 * OPSIZ;
  107. dstp += 8 * OPSIZ;
  108. len -= 8;
  109. }
  110. while (len != 0);
  111. /* This is the right position for do0. Please don't move
  112. it into the loop. */
  113. do0:
  114. ((op_t *) dstp)[0] = a1;
  115. }
  116. /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
  117. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  118. DSTP should be aligned for memory operations on `op_t's, but SRCP must
  119. *not* be aligned. */
  120. void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, int len)
  121. {
  122. op_t a0 = 0, a1 = 0, a2 = 0, a3 = 0;
  123. int sh_1, sh_2;
  124. /* Calculate how to shift a word read at the memory operation
  125. aligned srcp to make it aligned for copy. */
  126. sh_1 = 8 * (srcp % OPSIZ);
  127. sh_2 = 8 * OPSIZ - sh_1;
  128. /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
  129. it points in the middle of. */
  130. srcp &= -OPSIZ;
  131. switch (len % 4)
  132. {
  133. case 2:
  134. a1 = ((op_t *) srcp)[0];
  135. a2 = ((op_t *) srcp)[1];
  136. srcp -= 1 * OPSIZ;
  137. dstp -= 3 * OPSIZ;
  138. len += 2;
  139. goto do1;
  140. case 3:
  141. a0 = ((op_t *) srcp)[0];
  142. a1 = ((op_t *) srcp)[1];
  143. srcp -= 0 * OPSIZ;
  144. dstp -= 2 * OPSIZ;
  145. len += 1;
  146. goto do2;
  147. case 0:
  148. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  149. return;
  150. a3 = ((op_t *) srcp)[0];
  151. a0 = ((op_t *) srcp)[1];
  152. srcp -=-1 * OPSIZ;
  153. dstp -= 1 * OPSIZ;
  154. len += 0;
  155. goto do3;
  156. case 1:
  157. a2 = ((op_t *) srcp)[0];
  158. a3 = ((op_t *) srcp)[1];
  159. srcp -=-2 * OPSIZ;
  160. dstp -= 0 * OPSIZ;
  161. len -= 1;
  162. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  163. goto do0;
  164. goto do4; /* No-op. */
  165. }
  166. do
  167. {
  168. do4:
  169. a0 = ((op_t *) srcp)[0];
  170. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  171. do3:
  172. a1 = ((op_t *) srcp)[1];
  173. ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
  174. do2:
  175. a2 = ((op_t *) srcp)[2];
  176. ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
  177. do1:
  178. a3 = ((op_t *) srcp)[3];
  179. ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
  180. srcp += 4 * OPSIZ;
  181. dstp += 4 * OPSIZ;
  182. len -= 4;
  183. }
  184. while (len != 0);
  185. /* This is the right position for do0. Please don't move
  186. it into the loop. */
  187. do0:
  188. ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
  189. }
  190. /* _wordcopy_bwd_aligned -- Copy block finishing right before
  191. SRCP to block finishing right before DSTP with LEN `op_t' words
  192. (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
  193. operations on `op_t's. */
  194. void _wordcopy_bwd_aligned (long int dstp, long int srcp, int len)
  195. {
  196. op_t a0 = 0, a1 = 0;
  197. switch (len % 8)
  198. {
  199. case 2:
  200. srcp -= 2 * OPSIZ;
  201. dstp -= 1 * OPSIZ;
  202. a0 = ((op_t *) srcp)[1];
  203. len += 6;
  204. goto do1;
  205. case 3:
  206. srcp -= 3 * OPSIZ;
  207. dstp -= 2 * OPSIZ;
  208. a1 = ((op_t *) srcp)[2];
  209. len += 5;
  210. goto do2;
  211. case 4:
  212. srcp -= 4 * OPSIZ;
  213. dstp -= 3 * OPSIZ;
  214. a0 = ((op_t *) srcp)[3];
  215. len += 4;
  216. goto do3;
  217. case 5:
  218. srcp -= 5 * OPSIZ;
  219. dstp -= 4 * OPSIZ;
  220. a1 = ((op_t *) srcp)[4];
  221. len += 3;
  222. goto do4;
  223. case 6:
  224. srcp -= 6 * OPSIZ;
  225. dstp -= 5 * OPSIZ;
  226. a0 = ((op_t *) srcp)[5];
  227. len += 2;
  228. goto do5;
  229. case 7:
  230. srcp -= 7 * OPSIZ;
  231. dstp -= 6 * OPSIZ;
  232. a1 = ((op_t *) srcp)[6];
  233. len += 1;
  234. goto do6;
  235. case 0:
  236. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  237. return;
  238. srcp -= 8 * OPSIZ;
  239. dstp -= 7 * OPSIZ;
  240. a0 = ((op_t *) srcp)[7];
  241. goto do7;
  242. case 1:
  243. srcp -= 9 * OPSIZ;
  244. dstp -= 8 * OPSIZ;
  245. a1 = ((op_t *) srcp)[8];
  246. len -= 1;
  247. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  248. goto do0;
  249. goto do8; /* No-op. */
  250. }
  251. do
  252. {
  253. do8:
  254. a0 = ((op_t *) srcp)[7];
  255. ((op_t *) dstp)[7] = a1;
  256. do7:
  257. a1 = ((op_t *) srcp)[6];
  258. ((op_t *) dstp)[6] = a0;
  259. do6:
  260. a0 = ((op_t *) srcp)[5];
  261. ((op_t *) dstp)[5] = a1;
  262. do5:
  263. a1 = ((op_t *) srcp)[4];
  264. ((op_t *) dstp)[4] = a0;
  265. do4:
  266. a0 = ((op_t *) srcp)[3];
  267. ((op_t *) dstp)[3] = a1;
  268. do3:
  269. a1 = ((op_t *) srcp)[2];
  270. ((op_t *) dstp)[2] = a0;
  271. do2:
  272. a0 = ((op_t *) srcp)[1];
  273. ((op_t *) dstp)[1] = a1;
  274. do1:
  275. a1 = ((op_t *) srcp)[0];
  276. ((op_t *) dstp)[0] = a0;
  277. srcp -= 8 * OPSIZ;
  278. dstp -= 8 * OPSIZ;
  279. len -= 8;
  280. }
  281. while (len != 0);
  282. /* This is the right position for do0. Please don't move
  283. it into the loop. */
  284. do0:
  285. ((op_t *) dstp)[7] = a1;
  286. }
  287. /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
  288. before SRCP to block finishing right before DSTP with LEN `op_t'
  289. words (not LEN bytes!). DSTP should be aligned for memory
  290. operations on `op_t', but SRCP must *not* be aligned. */
  291. void _wordcopy_bwd_dest_aligned (long int dstp, long int srcp, int len)
  292. {
  293. op_t a0 = 0, a1 = 0, a2 = 0, a3 = 0;
  294. int sh_1, sh_2;
  295. /* Calculate how to shift a word read at the memory operation
  296. aligned srcp to make it aligned for copy. */
  297. sh_1 = 8 * (srcp % OPSIZ);
  298. sh_2 = 8 * OPSIZ - sh_1;
  299. /* Make srcp aligned by rounding it down to the beginning of the op_t
  300. it points in the middle of. */
  301. srcp &= -OPSIZ;
  302. srcp += OPSIZ;
  303. switch (len % 4)
  304. {
  305. case 2:
  306. srcp -= 3 * OPSIZ;
  307. dstp -= 1 * OPSIZ;
  308. a2 = ((op_t *) srcp)[2];
  309. a1 = ((op_t *) srcp)[1];
  310. len += 2;
  311. goto do1;
  312. case 3:
  313. srcp -= 4 * OPSIZ;
  314. dstp -= 2 * OPSIZ;
  315. a3 = ((op_t *) srcp)[3];
  316. a2 = ((op_t *) srcp)[2];
  317. len += 1;
  318. goto do2;
  319. case 0:
  320. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  321. return;
  322. srcp -= 5 * OPSIZ;
  323. dstp -= 3 * OPSIZ;
  324. a0 = ((op_t *) srcp)[4];
  325. a3 = ((op_t *) srcp)[3];
  326. goto do3;
  327. case 1:
  328. srcp -= 6 * OPSIZ;
  329. dstp -= 4 * OPSIZ;
  330. a1 = ((op_t *) srcp)[5];
  331. a0 = ((op_t *) srcp)[4];
  332. len -= 1;
  333. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  334. goto do0;
  335. goto do4; /* No-op. */
  336. }
  337. do
  338. {
  339. do4:
  340. a3 = ((op_t *) srcp)[3];
  341. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  342. do3:
  343. a2 = ((op_t *) srcp)[2];
  344. ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
  345. do2:
  346. a1 = ((op_t *) srcp)[1];
  347. ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
  348. do1:
  349. a0 = ((op_t *) srcp)[0];
  350. ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
  351. srcp -= 4 * OPSIZ;
  352. dstp -= 4 * OPSIZ;
  353. len -= 4;
  354. }
  355. while (len != 0);
  356. /* This is the right position for do0. Please don't move
  357. it into the loop. */
  358. do0:
  359. ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
  360. }