SortingNetwork.tcc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #ifndef __SORTINGNETWORK_TCC__
  2. #define __SORTINGNETWORK_TCC__
  3. // Merge operation for odd-even mergesort. Takes number of spaces to "skip" between items in buffer.
  4. // Left and right parts must be sorted, with left size a power of two and right size smaller.
  5. // Merges them to return a sorted result.
  6. template<OSwap_Style oswap_style>
  7. void OddEvenMerge(unsigned char *buf, uint64_t skip, uint64_t N, size_t block_size) {
  8. unsigned char *block1;
  9. unsigned char *block2;
  10. FOAV_SAFE_CNTXT(OddEvenMerge, N)
  11. if (N < 2) {
  12. return;
  13. }
  14. FOAV_SAFE_CNTXT(OddEvenMerge, N)
  15. if (N == 2) {
  16. block1 = buf;
  17. block2 = buf + block_size + (block_size*skip);
  18. oswap_buffer<oswap_style>(block1, block2, block_size,
  19. ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2)));
  20. return;
  21. }
  22. // Merge odd items
  23. OddEvenMerge<oswap_style>(buf, 2*skip+1, (N/2)+(N%2), block_size);
  24. // Merge even items
  25. OddEvenMerge<oswap_style>(buf+block_size+(block_size*skip), 2*skip+1, N/2, block_size);
  26. // Compare-and-swap subsequent item pairs, skipping first item
  27. block2 = buf;
  28. FOAV_SAFE_CNTXT(OddEvenMerge, N)
  29. for (int i=0; i<(N-1)/2; i++) {
  30. FOAV_SAFE_CNTXT(OddEvenMerge, i)
  31. block1 = block2 + block_size + (block_size*skip);
  32. block2 = block1 + block_size + (block_size*skip);
  33. oswap_buffer<oswap_style>(block1, block2, block_size,
  34. ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2)));
  35. }
  36. }
  37. template<OSwap_Style oswap_style>
  38. void OddEvenMergeSort(unsigned char *buf, uint64_t N, size_t block_size) {
  39. // Perform compare-and-swaps
  40. unsigned char *block1 = buf;
  41. unsigned char *block2 = buf + block_size;
  42. FOAV_SAFE_CNTXT(OddEvenMerge, N)
  43. if (N < 2) {
  44. return;
  45. }
  46. FOAV_SAFE_CNTXT(OddEvenMerge, N)
  47. if (N == 2) {
  48. bool swap_flag = ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2));
  49. oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);
  50. return;
  51. }
  52. // Divide into maximum power of two and remainder
  53. uint64_t N1 = pow2_lt(N);
  54. uint64_t N2 = N - N1;
  55. // Recursively sort left and right parts
  56. OddEvenMergeSort<oswap_style>(buf, N1, block_size);
  57. OddEvenMergeSort<oswap_style>(buf + block_size*N1, N2, block_size);
  58. // Apply merge operation to complete sort
  59. OddEvenMerge<oswap_style>(buf, 0, N, block_size);
  60. }
  61. template<OSwap_Style oswap_style, typename KeyType>
  62. void BitonicMerge(unsigned char *buffer, size_t N, size_t block_size, bool ascend) {
  63. FOAV_SAFE2_CNTXT(BitonicMerge, N, block_size)
  64. if(N<2){
  65. return;
  66. }
  67. else if((N & (N * -1))!=N) {
  68. size_t M = pow2_lt(N);
  69. unsigned char *block1 = buffer;
  70. unsigned char *block2 = buffer + (M * block_size);
  71. size_t feasible_swaps = N - M;
  72. FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, M)
  73. for(size_t i=0; i<feasible_swaps; i++) {
  74. FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, i)
  75. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  76. FOAV_SAFE_CNTXT(BitonicMerge, ascend)
  77. if(ascend){
  78. oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);
  79. } else {
  80. oswap_buffer<oswap_style>(block1, block2, block_size, !swap_flag);
  81. }
  82. block1+=block_size;
  83. block2+=block_size;
  84. FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, i)
  85. }
  86. BitonicMerge<oswap_style, KeyType>(buffer, M, block_size, ascend);
  87. BitonicMerge<oswap_style, KeyType>(buffer + (M * block_size), N-M, block_size, ascend);
  88. }
  89. else{ //Power of 2 case
  90. size_t split = N/2;
  91. unsigned char *block1 = buffer;
  92. unsigned char *block2 = buffer + (split * block_size);
  93. FOAV_SAFE_CNTXT(BitonicSort, split)
  94. for(size_t i=0; i<split; i++) {
  95. FOAV_SAFE_CNTXT(BitonicSort, i)
  96. FOAV_SAFE_CNTXT(BitonicSort, split)
  97. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  98. FOAV_SAFE_CNTXT(BitonicSort, ascend)
  99. if(ascend){
  100. oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);
  101. //ogt_comp_swap((uint64_t *) block1, (uint64_t *) block2, block1, block2, block_size);
  102. } else {
  103. oswap_buffer<oswap_style>(block1, block2, block_size, !swap_flag);
  104. //ogt_comp_swap((uint64_t *) block2, (uint64_t *) block1, block2, block1, block_size);
  105. }
  106. block1+=block_size;
  107. block2+=block_size;
  108. }
  109. BitonicMerge<oswap_style, KeyType>(buffer, split, block_size, ascend);
  110. BitonicMerge<oswap_style, KeyType>(buffer + (split * block_size), split, block_size, ascend);
  111. }
  112. }
  113. template<OSwap_Style oswap_style, typename KeyType = uint64_t>
  114. void BitonicSort(unsigned char *buffer, size_t N, size_t block_size, bool ascend) {
  115. FOAV_SAFE_CNTXT(BitonicSort, N)
  116. if(N < 2){
  117. return;
  118. }
  119. else { // Handle non-power of 2 case:
  120. size_t N1 = N/2;
  121. BitonicSort<oswap_style, KeyType>(buffer, N1, block_size, !ascend);
  122. BitonicSort<oswap_style, KeyType>(buffer + (block_size * N1), N-N1, block_size, ascend);
  123. BitonicMerge<oswap_style, KeyType>(buffer, N, block_size, ascend);
  124. }
  125. }
  126. template<OSwap_Style oswap_style, typename KeyType>
  127. void BitonicMerge(unsigned char *keys, size_t N, unsigned char *associated_data1,
  128. unsigned char *associated_data2, size_t data_size, bool ascend) {
  129. if(associated_data1==NULL) {
  130. if(N<2){
  131. return;
  132. }
  133. else if((N & (N * -1))!=N) {
  134. size_t M = pow2_lt(N);
  135. unsigned char *block1 = keys;
  136. unsigned char *block2 = keys + (M * sizeof(KeyType));
  137. size_t feasible_swaps = N - M;
  138. for(size_t i=0; i<feasible_swaps; i++) {
  139. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  140. if(ascend){
  141. oswap_buffer<oswap_style>(block1, block2, data_size, swap_flag);
  142. } else {
  143. oswap_buffer<oswap_style>(block1, block2, data_size, !swap_flag);
  144. }
  145. block1+=data_size;
  146. block2+=data_size;
  147. }
  148. BitonicMerge<oswap_style, KeyType>(keys, M, associated_data1, associated_data2, data_size, ascend);
  149. BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1, associated_data2, data_size, ascend);
  150. }
  151. else{ //Power of 2 case
  152. size_t split = N/2;
  153. unsigned char *block1 = keys;
  154. unsigned char *block2 = keys + (split * sizeof(KeyType));
  155. for(size_t i=0; i<split; i++) {
  156. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  157. if(ascend){
  158. oswap_buffer<oswap_style>(block1, block2, data_size, swap_flag);
  159. //ogt_comp_swap((uint64_t *) block1, (uint64_t *) block2, block1, block2, block_size);
  160. } else {
  161. oswap_buffer<oswap_style>(block1, block2, data_size, !swap_flag);
  162. //ogt_comp_swap((uint64_t *) block2, (uint64_t *) block1, block2, block1, block_size);
  163. }
  164. block1+=data_size;
  165. block2+=data_size;
  166. }
  167. BitonicMerge<oswap_style, KeyType>(keys, split, data_size, ascend);
  168. BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), split, data_size, ascend);
  169. }
  170. } else{
  171. if(N<2){
  172. return;
  173. }
  174. else if((N & (N * -1))!=N) {
  175. size_t M = pow2_lt(N);
  176. unsigned char *block1 = keys;
  177. unsigned char *block2 = keys + (M * sizeof(KeyType));
  178. size_t feasible_swaps = N - M;
  179. unsigned char *adata1_l = associated_data1;
  180. unsigned char *adata1_r = associated_data1 + (M * data_size);
  181. unsigned char *adata2_l = associated_data2;
  182. unsigned char *adata2_r = associated_data2;
  183. if(associated_data2!=NULL) {
  184. adata2_r = associated_data2 + (M * data_size);
  185. }
  186. for(size_t i=0; i<feasible_swaps; i++) {
  187. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  188. if(ascend){
  189. oswap_key<KeyType>(block1, block2, swap_flag);
  190. oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, swap_flag);
  191. if(associated_data2!=NULL){
  192. oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, swap_flag);
  193. }
  194. } else {
  195. oswap_key<KeyType>(block1, block2, !swap_flag);
  196. oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, !swap_flag);
  197. if(associated_data2!=NULL){
  198. oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, !swap_flag);
  199. }
  200. }
  201. block1+=sizeof(KeyType);
  202. block2+=sizeof(KeyType);
  203. adata1_l+=data_size;
  204. adata1_r+=data_size;
  205. if(associated_data2!=NULL){
  206. adata2_l+=data_size;
  207. adata2_r+=data_size;
  208. }
  209. }
  210. BitonicMerge<oswap_style, KeyType>(keys, M, associated_data1, associated_data2, data_size, ascend);
  211. if(associated_data2==NULL)
  212. BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1 + (M*data_size), associated_data2, data_size, ascend);
  213. else
  214. BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1 + (M*data_size), associated_data2 + (M*data_size), data_size, ascend);
  215. }
  216. else{ //Power of 2 case
  217. size_t split = N/2;
  218. unsigned char *block1 = keys;
  219. unsigned char *block2 = keys + (split * sizeof(KeyType));
  220. unsigned char *adata1_l = associated_data1;
  221. unsigned char *adata1_r = associated_data1 + (split * data_size);
  222. unsigned char *adata2_l = associated_data2;
  223. unsigned char *adata2_r = associated_data2;
  224. if(associated_data2!=NULL) {
  225. adata2_r = associated_data2 + (split * data_size);
  226. }
  227. for(size_t i=0; i<split; i++) {
  228. uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);
  229. if(ascend){
  230. oswap_key<KeyType>(block1, block2, swap_flag);
  231. oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, swap_flag);
  232. if(associated_data2!=NULL){
  233. oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, swap_flag);
  234. }
  235. } else {
  236. oswap_key<KeyType>(block1, block2, !swap_flag);
  237. oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, !swap_flag);
  238. if(associated_data2!=NULL){
  239. oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, !swap_flag);
  240. }
  241. }
  242. block1+=sizeof(KeyType);
  243. block2+=sizeof(KeyType);
  244. adata1_l+=data_size;
  245. adata1_r+=data_size;
  246. if(associated_data2!=NULL){
  247. adata2_l+=data_size;
  248. adata2_r+=data_size;
  249. }
  250. }
  251. BitonicMerge<oswap_style, KeyType>(keys, split, associated_data1, associated_data2, data_size, ascend);
  252. if(associated_data2==NULL)
  253. BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), N-split, associated_data1 + (split*data_size), associated_data2, data_size, ascend);
  254. else
  255. BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), N-split, associated_data1 + (split*data_size), associated_data2 + (split*data_size), data_size, ascend);
  256. }
  257. }
  258. }
  259. template<OSwap_Style oswap_style, typename KeyType = uint64_t>
  260. void BitonicSort(unsigned char *keys, size_t N, unsigned char *associated_data1,
  261. unsigned char *associated_data2, size_t data_size, bool ascend) {
  262. FOAV_SAFE_CNTXT(BitonicSort, N)
  263. if(N < 2){
  264. return;
  265. }
  266. else {
  267. size_t N1 = N/2;
  268. FOAV_SAFE_CNTXT(BitonicSort, associated_data1)
  269. FOAV_SAFE_CNTXT(BitonicSort, associated_data2)
  270. if(associated_data1==NULL){
  271. BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,
  272. data_size, !ascend);
  273. // Increment keys by N1 data_size blocks, since keys holds the entire buffer to sort.
  274. BitonicSort<oswap_style, KeyType>(keys + (N1*data_size), N-N1, associated_data1,
  275. associated_data2, data_size, ascend);
  276. BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,
  277. data_size, ascend);
  278. } else if(associated_data2==NULL){
  279. //There is only one associated_data list.
  280. BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,
  281. data_size, !ascend);
  282. BitonicSort<oswap_style, KeyType>(keys + (N1*sizeof(KeyType)), N-N1, associated_data1 + (N1*data_size),
  283. associated_data2, data_size, ascend);
  284. BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,
  285. data_size, ascend);
  286. FOAV_SAFE_CNTXT(BitonicSort, associated_data1)
  287. FOAV_SAFE_CNTXT(BitonicSort, associated_data2)
  288. } else {
  289. //Both associated_data lists.
  290. BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,
  291. data_size, !ascend);
  292. BitonicSort<oswap_style, KeyType>(keys + (N1*sizeof(KeyType)), N-N1, associated_data1 + (N1*data_size),
  293. associated_data2 + (N1*data_size), data_size, ascend);
  294. BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,
  295. data_size, ascend);
  296. }
  297. }
  298. }
  299. #endif