memcmp.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /* Copyright (C) 1991,1993,1995,1997,1998,2003,2004
  2. Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. Contributed by Torbjorn Granlund (tege@sics.se).
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. #include "api.h"
  18. #undef __ptr_t
  19. #if defined __cplusplus || (defined __STDC__ && __STDC__)
  20. #define __ptr_t void*
  21. #else /* Not C++ or ANSI C. */
  22. #undef const
  23. #define const
  24. #define __ptr_t char*
  25. #endif /* C++ or ANSI C. */
  26. #include <host_endian.h>
  27. #include <sysdeps/generic/memcopy.h>
  28. #if __BYTE_ORDER == __BIG_ENDIAN
  29. #define WORDS_BIGENDIAN
  30. #endif
  31. #ifdef WORDS_BIGENDIAN
  32. #define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
  33. #else
  34. #define CMP_LT_OR_GT(a, b) memcmp_bytes((a), (b))
  35. #endif
  36. /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
  37. /* The strategy of this memcmp is:
  38. 1. Compare bytes until one of the block pointers is aligned.
  39. 2. Compare using memcmp_common_alignment or
  40. memcmp_not_common_alignment, regarding the alignment of the other
  41. block after the initial byte operations. The maximum number of
  42. full words (of type op_t) are compared in this way.
  43. 3. Compare the few remaining bytes. */
  44. #ifndef WORDS_BIGENDIAN
  45. /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
  46. A and B are known to be different.
  47. This is needed only on little-endian machines. */
  48. static int memcmp_bytes(op_t a, op_t b) {
  49. long int srcp1 = (long int)&a;
  50. long int srcp2 = (long int)&b;
  51. op_t a0, b0;
  52. do {
  53. a0 = ((byte*)srcp1)[0];
  54. b0 = ((byte*)srcp2)[0];
  55. srcp1 += 1;
  56. srcp2 += 1;
  57. } while (a0 == b0);
  58. return a0 - b0;
  59. }
  60. #endif
  61. /* (memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
  62. objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
  63. memory operations on `op_t's. */
  64. static int memcmp_common_alignment(long srcp1, long srcp2, int len) {
  65. op_t a0, a1;
  66. op_t b0, b1;
  67. switch (len % 4) {
  68. default: /* Avoid warning about uninitialized local variables. */
  69. case 2:
  70. a0 = ((op_t*)srcp1)[0];
  71. b0 = ((op_t*)srcp2)[0];
  72. srcp1 -= 2 * OPSIZ;
  73. srcp2 -= 2 * OPSIZ;
  74. len += 2;
  75. goto do1;
  76. case 3:
  77. a1 = ((op_t*)srcp1)[0];
  78. b1 = ((op_t*)srcp2)[0];
  79. srcp1 -= OPSIZ;
  80. srcp2 -= OPSIZ;
  81. len += 1;
  82. goto do2;
  83. case 0:
  84. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  85. return 0;
  86. a0 = ((op_t*)srcp1)[0];
  87. b0 = ((op_t*)srcp2)[0];
  88. goto do3;
  89. case 1:
  90. a1 = ((op_t*)srcp1)[0];
  91. b1 = ((op_t*)srcp2)[0];
  92. srcp1 += OPSIZ;
  93. srcp2 += OPSIZ;
  94. len -= 1;
  95. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  96. goto do0;
  97. /* Fall through. */
  98. }
  99. do {
  100. a0 = ((op_t*)srcp1)[0];
  101. b0 = ((op_t*)srcp2)[0];
  102. if (a1 != b1)
  103. return CMP_LT_OR_GT(a1, b1);
  104. do3:
  105. a1 = ((op_t*)srcp1)[1];
  106. b1 = ((op_t*)srcp2)[1];
  107. if (a0 != b0)
  108. return CMP_LT_OR_GT(a0, b0);
  109. do2:
  110. a0 = ((op_t*)srcp1)[2];
  111. b0 = ((op_t*)srcp2)[2];
  112. if (a1 != b1)
  113. return CMP_LT_OR_GT(a1, b1);
  114. do1:
  115. a1 = ((op_t*)srcp1)[3];
  116. b1 = ((op_t*)srcp2)[3];
  117. if (a0 != b0)
  118. return CMP_LT_OR_GT(a0, b0);
  119. srcp1 += 4 * OPSIZ;
  120. srcp2 += 4 * OPSIZ;
  121. len -= 4;
  122. } while (len != 0);
  123. /* This is the right position for do0. Please don't move
  124. it into the loop. */
  125. do0:
  126. if (a1 != b1)
  127. return CMP_LT_OR_GT(a1, b1);
  128. return 0;
  129. }
  130. /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
  131. `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
  132. operations on `op_t', but SRCP1 *should be unaligned*. */
  133. static int memcmp_not_common_alignment(long srcp1, long srcp2, int len) {
  134. op_t a0, a1, a2, a3;
  135. op_t b0, b1, b2, b3;
  136. op_t x;
  137. int shl, shr;
  138. /* Calculate how to shift a word read at the memory operation
  139. aligned srcp1 to make it aligned for comparison. */
  140. shl = 8 * (srcp1 % OPSIZ);
  141. shr = 8 * OPSIZ - shl;
  142. /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
  143. it points in the middle of. */
  144. srcp1 &= -OPSIZ;
  145. switch (len % 4) {
  146. default: /* Avoid warning about uninitialized local variables. */
  147. case 2:
  148. a1 = ((op_t*)srcp1)[0];
  149. a2 = ((op_t*)srcp1)[1];
  150. b2 = ((op_t*)srcp2)[0];
  151. srcp1 -= 1 * OPSIZ;
  152. srcp2 -= 2 * OPSIZ;
  153. len += 2;
  154. goto do1;
  155. case 3:
  156. a0 = ((op_t*)srcp1)[0];
  157. a1 = ((op_t*)srcp1)[1];
  158. b1 = ((op_t*)srcp2)[0];
  159. srcp2 -= 1 * OPSIZ;
  160. len += 1;
  161. goto do2;
  162. case 0:
  163. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  164. return 0;
  165. a3 = ((op_t*)srcp1)[0];
  166. a0 = ((op_t*)srcp1)[1];
  167. b0 = ((op_t*)srcp2)[0];
  168. srcp1 += 1 * OPSIZ;
  169. goto do3;
  170. case 1:
  171. a2 = ((op_t*)srcp1)[0];
  172. a3 = ((op_t*)srcp1)[1];
  173. b3 = ((op_t*)srcp2)[0];
  174. srcp1 += 2 * OPSIZ;
  175. srcp2 += 1 * OPSIZ;
  176. len -= 1;
  177. if (OP_T_THRES <= 3 * OPSIZ && len == 0)
  178. goto do0;
  179. /* Fall through. */
  180. }
  181. do {
  182. a0 = ((op_t*)srcp1)[0];
  183. b0 = ((op_t*)srcp2)[0];
  184. x = MERGE(a2, shl, a3, shr);
  185. if (x != b3)
  186. return CMP_LT_OR_GT(x, b3);
  187. do3:
  188. a1 = ((op_t*)srcp1)[1];
  189. b1 = ((op_t*)srcp2)[1];
  190. x = MERGE(a3, shl, a0, shr);
  191. if (x != b0)
  192. return CMP_LT_OR_GT(x, b0);
  193. do2:
  194. a2 = ((op_t*)srcp1)[2];
  195. b2 = ((op_t*)srcp2)[2];
  196. x = MERGE(a0, shl, a1, shr);
  197. if (x != b1)
  198. return CMP_LT_OR_GT(x, b1);
  199. do1:
  200. a3 = ((op_t*)srcp1)[3];
  201. b3 = ((op_t*)srcp2)[3];
  202. x = MERGE(a1, shl, a2, shr);
  203. if (x != b2)
  204. return CMP_LT_OR_GT(x, b2);
  205. srcp1 += 4 * OPSIZ;
  206. srcp2 += 4 * OPSIZ;
  207. len -= 4;
  208. } while (len != 0);
  209. /* This is the right position for do0. Please don't move
  210. it into the loop. */
  211. do0:
  212. x = MERGE(a2, shl, a3, shr);
  213. if (x != b3)
  214. return CMP_LT_OR_GT(x, b3);
  215. return 0;
  216. }
  217. int memcmp(const __ptr_t s1, const __ptr_t s2, size_t len) {
  218. op_t a0, b0, res;
  219. long int srcp1 = (long int)s1;
  220. long int srcp2 = (long int)s2;
  221. if (len >= OP_T_THRES) {
  222. /* There are at least some bytes to compare. No need to test
  223. for LEN == 0 in this alignment loop. */
  224. while (srcp2 % OPSIZ != 0) {
  225. a0 = ((byte*)srcp1)[0];
  226. b0 = ((byte*)srcp2)[0];
  227. srcp1 += 1;
  228. srcp2 += 1;
  229. res = a0 - b0;
  230. if (res != 0)
  231. return res;
  232. len -= 1;
  233. }
  234. /* SRCP2 is now aligned for memory operations on `op_t'.
  235. SRCP1 alignment determines if we can do a simple,
  236. aligned compare or need to shuffle bits. */
  237. res = (srcp1 % OPSIZ == 0) ? memcmp_common_alignment(srcp1, srcp2, len / OPSIZ)
  238. : memcmp_not_common_alignment(srcp1, srcp2, len / OPSIZ);
  239. if (res != 0)
  240. return res;
  241. /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
  242. srcp1 += len & -OPSIZ;
  243. srcp2 += len & -OPSIZ;
  244. len %= OPSIZ;
  245. }
  246. /* There are just a few bytes to compare. Use byte memory operations. */
  247. while (len != 0) {
  248. a0 = ((byte*)srcp1)[0];
  249. b0 = ((byte*)srcp2)[0];
  250. srcp1 += 1;
  251. srcp2 += 1;
  252. res = a0 - b0;
  253. if (res != 0)
  254. return res;
  255. len -= 1;
  256. }
  257. return 0;
  258. }