memcmp.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /* Copyright (C) 1991,1993,1995,1997,1998,2003,2004
  2. Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. #include "api.h"
  18. #undef __ptr_t
  19. #if defined __cplusplus || (defined __STDC__ && __STDC__)
  20. # define __ptr_t void *
  21. #else /* Not C++ or ANSI C. */
  22. # undef const
  23. # define const
  24. # define __ptr_t char *
  25. #endif /* C++ or ANSI C. */
  26. # include <sysdeps/generic/memcopy.h>
  27. # include <endian.h>
  28. # if __BYTE_ORDER == __BIG_ENDIAN
  29. # define WORDS_BIGENDIAN
  30. # endif
  31. #ifdef WORDS_BIGENDIAN
  32. # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
  33. #else
  34. # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
  35. #endif
  36. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
  37. /* The strategy of this memcmp is:
  38. 1. Compare bytes until one of the block pointers is aligned.
  39. 2. Compare using memcmp_common_alignment or
  40. memcmp_not_common_alignment, regarding the alignment of the other
  41. block after the initial byte operations. The maximum number of
  42. full words (of type op_t) are compared in this way.
  43. 3. Compare the few remaining bytes. */
  44. #ifndef WORDS_BIGENDIAN
  45. /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
  46. A and B are known to be different.
  47. This is needed only on little-endian machines. */
  48. static int memcmp_bytes (op_t a, op_t b)
  49. {
  50. long int srcp1 = (long int) &a;
  51. long int srcp2 = (long int) &b;
  52. op_t a0, b0;
  53. do {
  54. a0 = ((byte *) srcp1)[0];
  55. b0 = ((byte *) srcp2)[0];
  56. srcp1 += 1;
  57. srcp2 += 1;
  58. } while (a0 == b0);
  59. return a0 - b0;
  60. }
  61. #endif
  62. /* (memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
  63. objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
  64. memory operations on `op_t's. */
  65. static int memcmp_common_alignment (long srcp1, long srcp2, size_t len)
  66. {
  67. op_t a0, a1;
  68. op_t b0, b1;
  69. switch (len % 4) {
  70. default: /* Avoid warning about uninitialized local variables. */
  71. case 2:
  72. a0 = ((op_t *) srcp1)[0];
  73. b0 = ((op_t *) srcp2)[0];
  74. srcp1 -= 2 * OPSIZ;
  75. srcp2 -= 2 * OPSIZ;
  76. len += 2;
  77. goto do1;
  78. case 3:
  79. a1 = ((op_t *) srcp1)[0];
  80. b1 = ((op_t *) srcp2)[0];
  81. srcp1 -= OPSIZ;
  82. srcp2 -= OPSIZ;
  83. len += 1;
  84. goto do2;
  85. case 0:
  86. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  87. return 0;
  88. a0 = ((op_t *) srcp1)[0];
  89. b0 = ((op_t *) srcp2)[0];
  90. goto do3;
  91. case 1:
  92. a1 = ((op_t *) srcp1)[0];
  93. b1 = ((op_t *) srcp2)[0];
  94. srcp1 += OPSIZ;
  95. srcp2 += OPSIZ;
  96. len -= 1;
  97. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  98. goto do0;
  99. /* Fall through. */
  100. }
  101. do {
  102. a0 = ((op_t *) srcp1)[0];
  103. b0 = ((op_t *) srcp2)[0];
  104. if (a1 != b1)
  105. return CMP_LT_OR_GT (a1, b1);
  106. do3:
  107. a1 = ((op_t *) srcp1)[1];
  108. b1 = ((op_t *) srcp2)[1];
  109. if (a0 != b0)
  110. return CMP_LT_OR_GT (a0, b0);
  111. do2:
  112. a0 = ((op_t *) srcp1)[2];
  113. b0 = ((op_t *) srcp2)[2];
  114. if (a1 != b1)
  115. return CMP_LT_OR_GT (a1, b1);
  116. do1:
  117. a1 = ((op_t *) srcp1)[3];
  118. b1 = ((op_t *) srcp2)[3];
  119. if (a0 != b0)
  120. return CMP_LT_OR_GT (a0, b0);
  121. srcp1 += 4 * OPSIZ;
  122. srcp2 += 4 * OPSIZ;
  123. len -= 4;
  124. } while (len != 0);
  125. /* This is the right position for do0. Please don't move
  126. it into the loop. */
  127. do0:
  128. if (a1 != b1)
  129. return CMP_LT_OR_GT (a1, b1);
  130. return 0;
  131. }
  132. /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
  133. `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
  134. operations on `op_t', but SRCP1 *should be unaligned*. */
  135. static int memcmp_not_common_alignment (long srcp1, long srcp2, size_t len)
  136. {
  137. op_t a0, a1, a2, a3;
  138. op_t b0, b1, b2, b3;
  139. op_t x;
  140. int shl, shr;
  141. /* Calculate how to shift a word read at the memory operation
  142. aligned srcp1 to make it aligned for comparison. */
  143. shl = 8 * (srcp1 % OPSIZ);
  144. shr = 8 * OPSIZ - shl;
  145. /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
  146. it points in the middle of. */
  147. srcp1 &= -OPSIZ;
  148. switch (len % 4) {
  149. default: /* Avoid warning about uninitialized local variables. */
  150. case 2:
  151. a1 = ((op_t *) srcp1)[0];
  152. a2 = ((op_t *) srcp1)[1];
  153. b2 = ((op_t *) srcp2)[0];
  154. srcp1 -= 1 * OPSIZ;
  155. srcp2 -= 2 * OPSIZ;
  156. len += 2;
  157. goto do1;
  158. case 3:
  159. a0 = ((op_t *) srcp1)[0];
  160. a1 = ((op_t *) srcp1)[1];
  161. b1 = ((op_t *) srcp2)[0];
  162. srcp2 -= 1 * OPSIZ;
  163. len += 1;
  164. goto do2;
  165. case 0:
  166. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  167. return 0;
  168. a3 = ((op_t *) srcp1)[0];
  169. a0 = ((op_t *) srcp1)[1];
  170. b0 = ((op_t *) srcp2)[0];
  171. srcp1 += 1 * OPSIZ;
  172. goto do3;
  173. case 1:
  174. a2 = ((op_t *) srcp1)[0];
  175. a3 = ((op_t *) srcp1)[1];
  176. b3 = ((op_t *) srcp2)[0];
  177. srcp1 += 2 * OPSIZ;
  178. srcp2 += 1 * OPSIZ;
  179. len -= 1;
  180. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  181. goto do0;
  182. /* Fall through. */
  183. }
  184. do {
  185. a0 = ((op_t *) srcp1)[0];
  186. b0 = ((op_t *) srcp2)[0];
  187. x = MERGE(a2, shl, a3, shr);
  188. if (x != b3)
  189. return CMP_LT_OR_GT (x, b3);
  190. do3:
  191. a1 = ((op_t *) srcp1)[1];
  192. b1 = ((op_t *) srcp2)[1];
  193. x = MERGE(a3, shl, a0, shr);
  194. if (x != b0)
  195. return CMP_LT_OR_GT (x, b0);
  196. do2:
  197. a2 = ((op_t *) srcp1)[2];
  198. b2 = ((op_t *) srcp2)[2];
  199. x = MERGE(a0, shl, a1, shr);
  200. if (x != b1)
  201. return CMP_LT_OR_GT (x, b1);
  202. do1:
  203. a3 = ((op_t *) srcp1)[3];
  204. b3 = ((op_t *) srcp2)[3];
  205. x = MERGE(a1, shl, a2, shr);
  206. if (x != b2)
  207. return CMP_LT_OR_GT (x, b2);
  208. srcp1 += 4 * OPSIZ;
  209. srcp2 += 4 * OPSIZ;
  210. len -= 4;
  211. } while (len != 0);
  212. /* This is the right position for do0. Please don't move
  213. it into the loop. */
  214. do0:
  215. x = MERGE(a2, shl, a3, shr);
  216. if (x != b3)
  217. return CMP_LT_OR_GT (x, b3);
  218. return 0;
  219. }
  220. int memcmp (const __ptr_t s1, const __ptr_t s2, size_t len)
  221. {
  222. op_t a0, b0, res;
  223. long int srcp1 = (long int) s1;
  224. long int srcp2 = (long int) s2;
  225. if (len >= OP_T_THRES) {
  226. /* There are at least some bytes to compare. No need to test
  227. for LEN == 0 in this alignment loop. */
  228. while (srcp2 % OPSIZ != 0) {
  229. a0 = ((byte *) srcp1)[0];
  230. b0 = ((byte *) srcp2)[0];
  231. srcp1 += 1;
  232. srcp2 += 1;
  233. res = a0 - b0;
  234. if (res != 0)
  235. return res;
  236. len -= 1;
  237. }
  238. /* SRCP2 is now aligned for memory operations on `op_t'.
  239. SRCP1 alignment determines if we can do a simple,
  240. aligned compare or need to shuffle bits. */
  241. res = (srcp1 % OPSIZ == 0) ?
  242. memcmp_common_alignment (srcp1, srcp2, len / OPSIZ) :
  243. memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
  244. if (res != 0)
  245. return res;
  246. /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
  247. srcp1 += len & -OPSIZ;
  248. srcp2 += len & -OPSIZ;
  249. len %= OPSIZ;
  250. }
  251. /* There are just a few bytes to compare. Use byte memory operations. */
  252. while (len != 0) {
  253. a0 = ((byte *) srcp1)[0];
  254. b0 = ((byte *) srcp2)[0];
  255. srcp1 += 1;
  256. srcp2 += 1;
  257. res = a0 - b0;
  258. if (res != 0)
  259. return res;
  260. len -= 1;
  261. }
  262. return 0;
  263. }