wordcopy.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /* _memcopy.c -- subroutines for memory copy functions.
  2. Copyright (C) 1991, 1996 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
  18. #include <stddef.h>
  19. #include <sysdeps/generic/memcopy.h>
  20. /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
  21. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  22. Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
  23. void _wordcopy_fwd_aligned(long int dstp, long int srcp, int len) {
  24. op_t a0 = 0, a1 = 0;
  25. switch (len % 8) {
  26. case 2:
  27. a0 = ((op_t*)srcp)[0];
  28. srcp -= 6 * OPSIZ;
  29. dstp -= 7 * OPSIZ;
  30. len += 6;
  31. goto do1;
  32. case 3:
  33. a1 = ((op_t*)srcp)[0];
  34. srcp -= 5 * OPSIZ;
  35. dstp -= 6 * OPSIZ;
  36. len += 5;
  37. goto do2;
  38. case 4:
  39. a0 = ((op_t*)srcp)[0];
  40. srcp -= 4 * OPSIZ;
  41. dstp -= 5 * OPSIZ;
  42. len += 4;
  43. goto do3;
  44. case 5:
  45. a1 = ((op_t*)srcp)[0];
  46. srcp -= 3 * OPSIZ;
  47. dstp -= 4 * OPSIZ;
  48. len += 3;
  49. goto do4;
  50. case 6:
  51. a0 = ((op_t*)srcp)[0];
  52. srcp -= 2 * OPSIZ;
  53. dstp -= 3 * OPSIZ;
  54. len += 2;
  55. goto do5;
  56. case 7:
  57. a1 = ((op_t*)srcp)[0];
  58. srcp -= 1 * OPSIZ;
  59. dstp -= 2 * OPSIZ;
  60. len += 1;
  61. goto do6;
  62. case 0:
  63. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  64. return;
  65. a0 = ((op_t*)srcp)[0];
  66. srcp -= 0 * OPSIZ;
  67. dstp -= 1 * OPSIZ;
  68. goto do7;
  69. case 1:
  70. a1 = ((op_t*)srcp)[0];
  71. srcp -= -1 * OPSIZ;
  72. dstp -= 0 * OPSIZ;
  73. len -= 1;
  74. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  75. goto do0;
  76. goto do8; /* No-op. */
  77. }
  78. do {
  79. do8:
  80. a0 = ((op_t*)srcp)[0];
  81. ((op_t*)dstp)[0] = a1;
  82. do7:
  83. a1 = ((op_t*)srcp)[1];
  84. ((op_t*)dstp)[1] = a0;
  85. do6:
  86. a0 = ((op_t*)srcp)[2];
  87. ((op_t*)dstp)[2] = a1;
  88. do5:
  89. a1 = ((op_t*)srcp)[3];
  90. ((op_t*)dstp)[3] = a0;
  91. do4:
  92. a0 = ((op_t*)srcp)[4];
  93. ((op_t*)dstp)[4] = a1;
  94. do3:
  95. a1 = ((op_t*)srcp)[5];
  96. ((op_t*)dstp)[5] = a0;
  97. do2:
  98. a0 = ((op_t*)srcp)[6];
  99. ((op_t*)dstp)[6] = a1;
  100. do1:
  101. a1 = ((op_t*)srcp)[7];
  102. ((op_t*)dstp)[7] = a0;
  103. srcp += 8 * OPSIZ;
  104. dstp += 8 * OPSIZ;
  105. len -= 8;
  106. } while (len != 0);
  107. /* This is the right position for do0. Please don't move
  108. it into the loop. */
  109. do0:
  110. ((op_t*)dstp)[0] = a1;
  111. }
  112. /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
  113. block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
  114. DSTP should be aligned for memory operations on `op_t's, but SRCP must
  115. *not* be aligned. */
  116. void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp, int len) {
  117. op_t a0 = 0, a1 = 0, a2 = 0, a3 = 0;
  118. int sh_1, sh_2;
  119. /* Calculate how to shift a word read at the memory operation
  120. aligned srcp to make it aligned for copy. */
  121. sh_1 = 8 * (srcp % OPSIZ);
  122. sh_2 = 8 * OPSIZ - sh_1;
  123. /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
  124. it points in the middle of. */
  125. srcp &= -OPSIZ;
  126. switch (len % 4) {
  127. case 2:
  128. a1 = ((op_t*)srcp)[0];
  129. a2 = ((op_t*)srcp)[1];
  130. srcp -= 1 * OPSIZ;
  131. dstp -= 3 * OPSIZ;
  132. len += 2;
  133. goto do1;
  134. case 3:
  135. a0 = ((op_t*)srcp)[0];
  136. a1 = ((op_t*)srcp)[1];
  137. srcp -= 0 * OPSIZ;
  138. dstp -= 2 * OPSIZ;
  139. len += 1;
  140. goto do2;
  141. case 0:
  142. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  143. return;
  144. a3 = ((op_t*)srcp)[0];
  145. a0 = ((op_t*)srcp)[1];
  146. srcp -= -1 * OPSIZ;
  147. dstp -= 1 * OPSIZ;
  148. len += 0;
  149. goto do3;
  150. case 1:
  151. a2 = ((op_t*)srcp)[0];
  152. a3 = ((op_t*)srcp)[1];
  153. srcp -= -2 * OPSIZ;
  154. dstp -= 0 * OPSIZ;
  155. len -= 1;
  156. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  157. goto do0;
  158. goto do4; /* No-op. */
  159. }
  160. do {
  161. do4:
  162. a0 = ((op_t*)srcp)[0];
  163. ((op_t*)dstp)[0] = MERGE(a2, sh_1, a3, sh_2);
  164. do3:
  165. a1 = ((op_t*)srcp)[1];
  166. ((op_t*)dstp)[1] = MERGE(a3, sh_1, a0, sh_2);
  167. do2:
  168. a2 = ((op_t*)srcp)[2];
  169. ((op_t*)dstp)[2] = MERGE(a0, sh_1, a1, sh_2);
  170. do1:
  171. a3 = ((op_t*)srcp)[3];
  172. ((op_t*)dstp)[3] = MERGE(a1, sh_1, a2, sh_2);
  173. srcp += 4 * OPSIZ;
  174. dstp += 4 * OPSIZ;
  175. len -= 4;
  176. } while (len != 0);
  177. /* This is the right position for do0. Please don't move
  178. it into the loop. */
  179. do0:
  180. ((op_t*)dstp)[0] = MERGE(a2, sh_1, a3, sh_2);
  181. }
  182. /* _wordcopy_bwd_aligned -- Copy block finishing right before
  183. SRCP to block finishing right before DSTP with LEN `op_t' words
  184. (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
  185. operations on `op_t's. */
  186. void _wordcopy_bwd_aligned(long int dstp, long int srcp, int len) {
  187. op_t a0 = 0, a1 = 0;
  188. switch (len % 8) {
  189. case 2:
  190. srcp -= 2 * OPSIZ;
  191. dstp -= 1 * OPSIZ;
  192. a0 = ((op_t*)srcp)[1];
  193. len += 6;
  194. goto do1;
  195. case 3:
  196. srcp -= 3 * OPSIZ;
  197. dstp -= 2 * OPSIZ;
  198. a1 = ((op_t*)srcp)[2];
  199. len += 5;
  200. goto do2;
  201. case 4:
  202. srcp -= 4 * OPSIZ;
  203. dstp -= 3 * OPSIZ;
  204. a0 = ((op_t*)srcp)[3];
  205. len += 4;
  206. goto do3;
  207. case 5:
  208. srcp -= 5 * OPSIZ;
  209. dstp -= 4 * OPSIZ;
  210. a1 = ((op_t*)srcp)[4];
  211. len += 3;
  212. goto do4;
  213. case 6:
  214. srcp -= 6 * OPSIZ;
  215. dstp -= 5 * OPSIZ;
  216. a0 = ((op_t*)srcp)[5];
  217. len += 2;
  218. goto do5;
  219. case 7:
  220. srcp -= 7 * OPSIZ;
  221. dstp -= 6 * OPSIZ;
  222. a1 = ((op_t*)srcp)[6];
  223. len += 1;
  224. goto do6;
  225. case 0:
  226. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  227. return;
  228. srcp -= 8 * OPSIZ;
  229. dstp -= 7 * OPSIZ;
  230. a0 = ((op_t*)srcp)[7];
  231. goto do7;
  232. case 1:
  233. srcp -= 9 * OPSIZ;
  234. dstp -= 8 * OPSIZ;
  235. a1 = ((op_t*)srcp)[8];
  236. len -= 1;
  237. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  238. goto do0;
  239. goto do8; /* No-op. */
  240. }
  241. do {
  242. do8:
  243. a0 = ((op_t*)srcp)[7];
  244. ((op_t*)dstp)[7] = a1;
  245. do7:
  246. a1 = ((op_t*)srcp)[6];
  247. ((op_t*)dstp)[6] = a0;
  248. do6:
  249. a0 = ((op_t*)srcp)[5];
  250. ((op_t*)dstp)[5] = a1;
  251. do5:
  252. a1 = ((op_t*)srcp)[4];
  253. ((op_t*)dstp)[4] = a0;
  254. do4:
  255. a0 = ((op_t*)srcp)[3];
  256. ((op_t*)dstp)[3] = a1;
  257. do3:
  258. a1 = ((op_t*)srcp)[2];
  259. ((op_t*)dstp)[2] = a0;
  260. do2:
  261. a0 = ((op_t*)srcp)[1];
  262. ((op_t*)dstp)[1] = a1;
  263. do1:
  264. a1 = ((op_t*)srcp)[0];
  265. ((op_t*)dstp)[0] = a0;
  266. srcp -= 8 * OPSIZ;
  267. dstp -= 8 * OPSIZ;
  268. len -= 8;
  269. } while (len != 0);
  270. /* This is the right position for do0. Please don't move
  271. it into the loop. */
  272. do0:
  273. ((op_t*)dstp)[7] = a1;
  274. }
  275. /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
  276. before SRCP to block finishing right before DSTP with LEN `op_t'
  277. words (not LEN bytes!). DSTP should be aligned for memory
  278. operations on `op_t', but SRCP must *not* be aligned. */
  279. void _wordcopy_bwd_dest_aligned(long int dstp, long int srcp, int len) {
  280. op_t a0 = 0, a1 = 0, a2 = 0, a3 = 0;
  281. int sh_1, sh_2;
  282. /* Calculate how to shift a word read at the memory operation
  283. aligned srcp to make it aligned for copy. */
  284. sh_1 = 8 * (srcp % OPSIZ);
  285. sh_2 = 8 * OPSIZ - sh_1;
  286. /* Make srcp aligned by rounding it down to the beginning of the op_t
  287. it points in the middle of. */
  288. srcp &= -OPSIZ;
  289. srcp += OPSIZ;
  290. switch (len % 4) {
  291. case 2:
  292. srcp -= 3 * OPSIZ;
  293. dstp -= 1 * OPSIZ;
  294. a2 = ((op_t*)srcp)[2];
  295. a1 = ((op_t*)srcp)[1];
  296. len += 2;
  297. goto do1;
  298. case 3:
  299. srcp -= 4 * OPSIZ;
  300. dstp -= 2 * OPSIZ;
  301. a3 = ((op_t*)srcp)[3];
  302. a2 = ((op_t*)srcp)[2];
  303. len += 1;
  304. goto do2;
  305. case 0:
  306. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  307. return;
  308. srcp -= 5 * OPSIZ;
  309. dstp -= 3 * OPSIZ;
  310. a0 = ((op_t*)srcp)[4];
  311. a3 = ((op_t*)srcp)[3];
  312. goto do3;
  313. case 1:
  314. srcp -= 6 * OPSIZ;
  315. dstp -= 4 * OPSIZ;
  316. a1 = ((op_t*)srcp)[5];
  317. a0 = ((op_t*)srcp)[4];
  318. len -= 1;
  319. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  320. goto do0;
  321. goto do4; /* No-op. */
  322. }
  323. do {
  324. do4:
  325. a3 = ((op_t*)srcp)[3];
  326. ((op_t*)dstp)[3] = MERGE(a0, sh_1, a1, sh_2);
  327. do3:
  328. a2 = ((op_t*)srcp)[2];
  329. ((op_t*)dstp)[2] = MERGE(a3, sh_1, a0, sh_2);
  330. do2:
  331. a1 = ((op_t*)srcp)[1];
  332. ((op_t*)dstp)[1] = MERGE(a2, sh_1, a3, sh_2);
  333. do1:
  334. a0 = ((op_t*)srcp)[0];
  335. ((op_t*)dstp)[0] = MERGE(a1, sh_1, a2, sh_2);
  336. srcp -= 4 * OPSIZ;
  337. dstp -= 4 * OPSIZ;
  338. len -= 4;
  339. } while (len != 0);
  340. /* This is the right position for do0. Please don't move
  341. it into the loop. */
  342. do0:
  343. ((op_t*)dstp)[3] = MERGE(a0, sh_1, a1, sh_2);
  344. }