TightCompaction_v2.tcc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. #ifndef __NOP_TIGHTCOMPACTION_V2_TCC__
  2. #define __NOP_TIGHTCOMPACTION_V2_TCC__
  3. #include "pthread.h"
  4. /*
  5. TightCompaction (Order Preserving Tight Compaction):
  6. Order Preserving TightCompaction can take an input array of blocks of
  7. block_size bytes each, and an array of "marked" elements with ones at the
  8. indices corresponding to the blocks that need to be compacted.
  9. It returns the input array TightCompact-ed, i.e. all the real blocks are
  10. moved to the start of the array, or compacted to an input index
  11. (with wraparound)
  12. This is the Oblivious Recusrive Compaction (ORCompact) algorithm from
  13. our CCS 2022 paper "Fast Fully Oblivious Compaction and Shuffling".
  14. */
  15. template <OSwap_Style oswap_style>
  16. void TightCompact_2power(unsigned char *buf, size_t N, size_t block_size, size_t offset, bool *selected) {
  17. // Compute counts of selected items at power-of-two intervals
  18. FOAV_SAFE2_CNTXT(TC_2power_summing_selected_count, N, block_size)
  19. uint32_t *selected_count = NULL;
  20. FOAV_SAFE_CNTXT(TC_2power, TC_PRECOMPUTE_COUNTS)
  21. if (TC_PRECOMPUTE_COUNTS) {
  22. // Allocate array to hold counts
  23. selected_count = new uint32_t[N+1];
  24. selected_count[0] = 0;
  25. // Compute cumulative counts
  26. for (size_t i=0; i<N; i++){
  27. FOAV_SAFE2_CNTXT(TC_2power_summing_selected_count, i, N)
  28. selected_count[i+1] = selected[i] + selected_count[i];
  29. }
  30. TightCompact_2power_inner<oswap_style>(buf, N, block_size, offset, selected, selected_count);
  31. delete[] selected_count;
  32. } else {
  33. TightCompact_2power_inner<oswap_style>(buf, N, block_size, offset, selected, selected_count);
  34. }
  35. }
  36. template <OSwap_Style oswap_style>
  37. void TightCompact_2power_inner(unsigned char *buf, size_t N, size_t block_size, size_t offset, bool *selected, uint32_t *selected_count) {
  38. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  39. if (N==1) {
  40. return;
  41. }
  42. if (N==2) {
  43. bool swap = (!selected[0] & selected[1]) ^ offset;
  44. oswap_buffer<oswap_style>(buf, buf+block_size, block_size, swap);
  45. return;
  46. }
  47. // Number of selected items in left half
  48. size_t m1;
  49. FOAV_SAFE_CNTXT(TC_2power, TC_PRECOMPUTE_COUNTS)
  50. if (TC_PRECOMPUTE_COUNTS) {
  51. m1 = selected_count[N/2] - selected_count[0];
  52. } else {
  53. m1=0;
  54. FOAV_SAFE_CNTXT(TC_2power, N)
  55. for(size_t i=0; i<N/2; i++){
  56. FOAV_SAFE_CNTXT(TC_2power, i)
  57. m1+=selected[i];
  58. }
  59. }
  60. size_t offset_mod = (offset & ((N/2)-1));
  61. size_t offset_m1_mod = (offset+m1) & ((N/2)-1);
  62. bool offset_right = (offset >= N/2);
  63. bool left_wrapped = ((offset_mod + m1) >= (N/2));
  64. TightCompact_2power_inner<oswap_style>(buf, N/2, block_size, offset_mod, selected, selected_count);
  65. TightCompact_2power_inner<oswap_style>(buf + ((N/2)*block_size), N/2, block_size, offset_m1_mod, (selected + (N/2)), selected_count + N/2);
  66. unsigned char *buf1_ptr = buf, *buf2_ptr = (buf + (N/2)*block_size);
  67. FOAV_SAFE_CNTXT(TC_2power_inner, TC_OPT_SWAP_FLAG)
  68. if (TC_OPT_SWAP_FLAG) {
  69. bool swap_flag = left_wrapped ^ offset_right;
  70. size_t num_swap = N/2;
  71. FOAV_SAFE2_CNTXT(TC_2power_inner, num_swap, block_size)
  72. for(size_t i=0; i<num_swap; i++){
  73. FOAV_SAFE2_CNTXT(TC_2power_inner_N/2_swaps, i, num_swap)
  74. swap_flag = swap_flag ^ (i == offset_m1_mod);
  75. oswap_buffer<oswap_style>(buf1_ptr, buf2_ptr, block_size, swap_flag);
  76. buf1_ptr+=block_size;
  77. buf2_ptr+=block_size;
  78. FOAV_SAFE2_CNTXT(TC_2power_inner, num_swap, block_size)
  79. }
  80. } else {
  81. FOAV_SAFE_CNTXT(TC_2power_inner, N)
  82. for(size_t i=0; i<N/2; i++){
  83. FOAV_SAFE_CNTXT(TC_2power_inner, i)
  84. bool swap_flag = (i>=offset_m1_mod) ^ left_wrapped ^ offset_right;
  85. oswap_buffer<oswap_style>(buf1_ptr, buf2_ptr, block_size, swap_flag);
  86. buf1_ptr+=block_size;
  87. buf2_ptr+=block_size;
  88. FOAV_SAFE2_CNTXT(TC_2power_inner, i, N)
  89. }
  90. }
  91. }
  92. struct TightCompact_2power_inner_parallel_args {
  93. unsigned char *buf;
  94. size_t N, block_size, offset;
  95. bool *selected;
  96. uint32_t *selected_count;
  97. size_t nthreads;
  98. };
  99. template <OSwap_Style oswap_style>
  100. static void* TightCompact_2power_inner_parallel_launch(void *voidargs) {
  101. struct TightCompact_2power_inner_parallel_args *args =
  102. (TightCompact_2power_inner_parallel_args *)voidargs;
  103. TightCompact_2power_inner_parallel<oswap_style>(args->buf, args->N,
  104. args->block_size, args->offset, args->selected, args->selected_count,
  105. args->nthreads);
  106. return NULL;
  107. }
  108. struct oswap_range_args {
  109. size_t block_size;
  110. size_t swap_start, swap_end;
  111. size_t offset_m1_mod;
  112. unsigned char *buf1, *buf2;
  113. bool swap_flag;
  114. };
  115. template <OSwap_Style oswap_style>
  116. static void* oswap_range(void *voidargs) {
  117. struct oswap_range_args *args = (oswap_range_args*)voidargs;
  118. size_t block_size = args->block_size;
  119. size_t swap_start = args->swap_start;
  120. size_t swap_end = args->swap_end;
  121. size_t offset_m1_mod = args->offset_m1_mod;
  122. unsigned char *buf1 = args->buf1 + swap_start*block_size;
  123. unsigned char *buf2 = args->buf2 + swap_start*block_size;
  124. bool swap_flag = args->swap_flag;
  125. FOAV_SAFE2_CNTXT(oswap_range, block_size, swap_start)
  126. FOAV_SAFE_CNTXT(oswap_range, swap_end)
  127. //FOAV_SAFE_CNTXT(oswap_range, &OSWAP_COUNTER)
  128. //printf("start oswap range %p %lu %lu\n", buf1, swap_start, swap_end);
  129. for(size_t i=swap_start; i<swap_end; i++){
  130. FOAV_SAFE2_CNTXT(oswap_range, i, swap_end)
  131. oswap_buffer<oswap_style>(buf1, buf2, block_size, swap_flag ^ (i >= offset_m1_mod));
  132. buf1+=block_size;
  133. buf2+=block_size;
  134. //FOAV_SAFE_CNTXT(oswap_range, &OSWAP_COUNTER)
  135. FOAV_SAFE2_CNTXT(oswap_range, swap_end, block_size)
  136. FOAV_SAFE_CNTXT(oswap_range, i)
  137. }
  138. //printf("end oswap range %p %lu %lu\n", buf1, swap_start, swap_end);
  139. return NULL;
  140. }
  141. template <OSwap_Style oswap_style>
  142. void TightCompact_2power_inner_parallel(unsigned char *buf, size_t N, size_t block_size, size_t offset, bool *selected, uint32_t *selected_count, size_t nthreads) {
  143. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, g_thread_id)
  144. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  145. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, nthreads)
  146. if (nthreads <= 1) {
  147. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  148. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, nthreads)
  149. unsigned long start = printf_with_rtclock("Thread %u starting TightCompact_2power_inner(buf=%p, N=%lu, offset=%lu, nthreads=%lu)\n", g_thread_id, buf, N, offset, nthreads);
  150. TightCompact_2power_inner<oswap_style>(buf, N, block_size, offset, selected, selected_count);
  151. printf_with_rtclock_diff(start, "Thread %u ending TightCompact_2power_inner(buf=%p, N=%lu, offset=%lu, nthreads=%lu)\n", g_thread_id, buf, N, offset, nthreads);
  152. return;
  153. }
  154. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, N)
  155. if (N==1) {
  156. return;
  157. }
  158. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, N)
  159. if (N==2) {
  160. bool swap = (!selected[0] & selected[1]) ^ offset;
  161. oswap_buffer<oswap_style>(buf, buf+block_size, block_size, swap);
  162. return;
  163. }
  164. unsigned long start = printf_with_rtclock("Thread %u starting TightCompact_2power_inner_parallel(buf=%p, N=%lu, offset=%lu, nthreads=%lu)\n", g_thread_id, buf, N, offset, nthreads);
  165. // Number of selected items in left half
  166. size_t m1;
  167. m1 = selected_count[N/2] - selected_count[0];
  168. size_t offset_mod = (offset & ((N/2)-1));
  169. size_t offset_m1_mod = (offset+m1) & ((N/2)-1);
  170. bool offset_right = (offset >= N/2);
  171. bool left_wrapped = ((offset_mod + m1) >= (N/2));
  172. size_t lthreads = nthreads/2;
  173. size_t rthreads = nthreads - lthreads;
  174. threadid_t rightthreadid = g_thread_id + lthreads;
  175. /* Dispatch the right half to thread g_thread_id + lthreads; it will inherit threads
  176. g_thread_id + lthreads .. g_thread_id + nthreads-1. */
  177. struct TightCompact_2power_inner_parallel_args rightargs = {
  178. buf+ ((N/2)*block_size), N/2, block_size, offset_m1_mod, selected + N/2, selected_count + N/2, rthreads
  179. };
  180. threadpool_dispatch(rightthreadid,
  181. TightCompact_2power_inner_parallel_launch<oswap_style>,
  182. &rightargs);
  183. /* Do the left half ourselves (threads g_thread_id .. g_thread_id + lthreads-1) */
  184. TightCompact_2power_inner_parallel<oswap_style>(buf, N/2, block_size, offset_mod, selected, selected_count, lthreads);
  185. threadpool_join(rightthreadid, NULL);
  186. unsigned char *buf1_ptr = buf, *buf2_ptr = (buf + (N/2)*block_size);
  187. bool swap_flag = left_wrapped ^ offset_right;
  188. size_t num_swap = N/2;
  189. FOAV_SAFE2_CNTXT(TC_2power_inner, num_swap, block_size)
  190. oswap_range_args args[nthreads];
  191. size_t inc = num_swap / nthreads;
  192. size_t extra = num_swap % nthreads;
  193. size_t last = 0;
  194. for (size_t i=0; i<nthreads; ++i) {
  195. size_t next = last + inc + (i < extra);
  196. args[i] = { block_size, last, next, offset_m1_mod, buf1_ptr, buf2_ptr, swap_flag };
  197. last = next;
  198. }
  199. for (size_t i=0; i<nthreads-1; ++i) {
  200. threadpool_dispatch(g_thread_id+1+i, oswap_range<oswap_style>, args+i);
  201. }
  202. // Do the last section ourselves
  203. oswap_range<oswap_style>((void*)(args+nthreads-1));
  204. for (size_t i=0; i<nthreads-1; ++i) {
  205. FOAV_SAFE2_CNTXT(TC_2power_inner_parallel, i, nthreads)
  206. threadpool_join(g_thread_id+1+i, NULL);
  207. }
  208. printf_with_rtclock_diff(start, "Thread %u ending TightCompact_2power_inner_parallel(buf=%p, N=%lu, offset=%lu, nthreads=%lu)\n", g_thread_id, buf, N, offset, nthreads);
  209. }
  210. /*
  211. NOTE: TightCompact can only be invoked with offset 0.
  212. To invoke with a non-0 offset, use TightCompact_2power, with N = 2^x.
  213. */
  214. template <OSwap_Style oswap_style>
  215. void TightCompact(unsigned char *buf, size_t N, size_t block_size,
  216. bool *selected) {
  217. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  218. uint32_t *selected_count = NULL;
  219. if (TC_PRECOMPUTE_COUNTS) {
  220. // Allocate array to hold counts
  221. try {
  222. selected_count = new uint32_t[N+1];
  223. } catch (std::bad_alloc&){
  224. printf("Allocating memory failed in TC\n");
  225. }
  226. selected_count[0] = 0;
  227. // Compute cumulative counts
  228. for (size_t i=0; i<N; i++){
  229. selected_count[i+1] = selected[i] + selected_count[i];
  230. }
  231. TightCompact_inner<oswap_style>(buf, N, block_size, selected, selected_count);
  232. delete[] selected_count;
  233. } else {
  234. TightCompact_inner<oswap_style>(buf, N, block_size, selected, selected_count);
  235. }
  236. }
  237. template <OSwap_Style oswap_style>
  238. void TightCompact_parallel(unsigned char *buf, size_t N, size_t block_size,
  239. bool *selected, size_t nthreads) {
  240. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  241. uint32_t *selected_count = NULL;
  242. // Allocate array to hold counts
  243. try {
  244. selected_count = new uint32_t[N+1];
  245. } catch (std::bad_alloc&){
  246. printf("Allocating memory failed in TC\n");
  247. }
  248. selected_count[0] = 0;
  249. // Compute cumulative counts
  250. for (size_t i=0; i<N; i++){
  251. selected_count[i+1] = selected[i] + selected_count[i];
  252. }
  253. //printf("TightCompact_parallel(nthreads=%lu)\n", nthreads);
  254. TightCompact_inner_parallel<oswap_style>(buf, N, block_size, selected, selected_count, nthreads);
  255. delete[] selected_count;
  256. }
  257. template <OSwap_Style oswap_style>
  258. void TightCompact_inner(unsigned char *buf, size_t N, size_t block_size, bool *selected, uint32_t *selected_count){
  259. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  260. if(N==0){
  261. return;
  262. }
  263. else if(N==1){
  264. return;
  265. }
  266. else if(N==2){
  267. bool swap = (!selected[0] & selected[1]);
  268. oswap_buffer<oswap_style>(buf, buf+block_size, block_size, swap);
  269. return;
  270. }
  271. size_t gt_pow2;
  272. size_t split_index;
  273. // Find largest power of 2 < N
  274. gt_pow2 = pow2_lt(N);
  275. // For Order-preserving ORCompact
  276. // This will be right (R) of the recursion, and the leftover non-power of 2 left (L)
  277. split_index = N - gt_pow2;
  278. // Number of selected items in the non-power of 2 side (left)
  279. size_t mL;
  280. if (TC_PRECOMPUTE_COUNTS) {
  281. mL = selected_count[split_index] - selected_count[0];
  282. } else {
  283. mL = 0;
  284. for(size_t i=0; i<split_index; i++){
  285. mL+=selected[i];
  286. }
  287. }
  288. unsigned char *L_ptr = buf;
  289. unsigned char *R_ptr = buf + (split_index * block_size);
  290. //printf("Lsize = %ld, Rsize = %ld, Rside offset = %ld\n", split_index, gt_pow2, (gt_pow2 - split_index + mL));
  291. TightCompact_inner<oswap_style>(L_ptr, split_index, block_size, selected, selected_count);
  292. TightCompact_2power_inner<oswap_style>(R_ptr, gt_pow2, block_size, (gt_pow2 - split_index + mL) % gt_pow2, selected+split_index, selected_count+split_index);
  293. // For OP we CnS the first n_2 elements (split_size) against the suffix n_2 elements of the n_1 (2 power elements)
  294. R_ptr = buf + (gt_pow2 * block_size);
  295. // Perform N-split_index oblivious swaps for this level
  296. FOAV_SAFE_CNTXT(TC_inner_oswap_loop, split_index)
  297. for (size_t i=0; i<split_index; i++){
  298. FOAV_SAFE2_CNTXT(TC_inner_oswap_loop, i, split_index)
  299. // Oswap blocks at L_start, R_start conditional on marked_items
  300. bool swap_flag = i>=mL;
  301. oswap_buffer<oswap_style>(L_ptr, R_ptr, block_size, swap_flag);
  302. L_ptr+=block_size;
  303. R_ptr+=block_size;
  304. FOAV_SAFE2_CNTXT(TC_inner_oswap_loop, i, split_index)
  305. }
  306. }
  307. template <OSwap_Style oswap_style>
  308. void TightCompact_inner_parallel(unsigned char *buf, size_t N, size_t block_size, bool *selected, uint32_t *selected_count, size_t nthreads){
  309. FOAV_SAFE2_CNTXT(TC_inner_base_cases_of_recursion, N, block_size)
  310. FOAV_SAFE_CNTXT(TC_inner_base_cases_of_recursion, nthreads)
  311. if (nthreads <= 1 || N < 16) {
  312. unsigned long start = printf_with_rtclock("Thread %u starting TightCompact_inner(N=%lu)\n", g_thread_id, N);
  313. TightCompact_inner<oswap_style>(buf, N, block_size, selected, selected_count);
  314. printf_with_rtclock_diff(start, "Thread %u ending TightCompact_inner(N=%lu)\n", g_thread_id, N);
  315. return;
  316. }
  317. if(N==0){
  318. return;
  319. }
  320. else if(N==1){
  321. return;
  322. }
  323. else if(N==2){
  324. bool swap = (!selected[0] & selected[1]);
  325. oswap_buffer<oswap_style>(buf, buf+block_size, block_size, swap);
  326. return;
  327. }
  328. unsigned long start = printf_with_rtclock("Thread %u starting TightCompact_inner_parallel(N=%lu, nthreads=%lu)\n", g_thread_id, N, nthreads);
  329. size_t split_index, n1, n2;
  330. // Find largest power of 2 < N
  331. // This will be right (R) n1 of the recursion, and the leftover left (L) n2
  332. n1 = pow2_lt(N);
  333. n2 = N - n1;
  334. // Number of selected items in left
  335. size_t m2;
  336. m2 = selected_count[n2] - selected_count[0];
  337. unsigned char *L_ptr = buf;
  338. unsigned char *R_ptr = buf + (n2 * block_size);
  339. size_t lthreads = nthreads/2;
  340. size_t rthreads = nthreads - lthreads;
  341. struct TightCompact_2power_inner_parallel_args rightargs = {
  342. R_ptr, n1, block_size, n1 - n2 + m2, selected + n2, selected_count + n2,
  343. rthreads
  344. };
  345. threadpool_dispatch(g_thread_id+lthreads,
  346. TightCompact_2power_inner_parallel_launch<oswap_style>,
  347. &rightargs);
  348. TightCompact_inner_parallel<oswap_style>(L_ptr, n2, block_size, selected, selected_count, lthreads);
  349. threadpool_join(g_thread_id+lthreads, NULL);
  350. size_t num_swap = N-n1;
  351. FOAV_SAFE2_CNTXT(TC_inner_parallel, nthreads, num_swap)
  352. oswap_range_args args[nthreads];
  353. size_t inc = num_swap / nthreads;
  354. size_t extra = num_swap % nthreads;
  355. size_t last = 0;
  356. // We tweak R_ptr before we compare, to set compare the n2 prefix in L with the n2 suffix of R
  357. R_ptr = buf + (n1 * block_size);
  358. for (size_t i=0; i<nthreads; ++i) {
  359. size_t next = last + inc + (i < extra);
  360. args[i] = { block_size, last, next, m2, L_ptr, R_ptr, false };
  361. last = next;
  362. FOAV_SAFE2_CNTXT(TC_inner_parallel, i, nthreads)
  363. }
  364. for (size_t i=0; i<nthreads-1; ++i) {
  365. threadpool_dispatch(g_thread_id+1+i, oswap_range<oswap_style>, args+i);
  366. }
  367. // Do the last section ourselves
  368. oswap_range<oswap_style>((void*)(args+nthreads-1));
  369. FOAV_SAFE_CNTXT(TC_inner_parallel, nthreads)
  370. for (size_t i=0; i<nthreads-1; ++i) {
  371. threadpool_join(g_thread_id+1+i, NULL);
  372. FOAV_SAFE2_CNTXT(TC_inner_parallel, i, nthreads)
  373. }
  374. printf_with_rtclock_diff(start, "Thread %u ending TightCompact_inner_parallel(N=%lu, nthreads=%lu)\n", g_thread_id, N, nthreads);
  375. }
  376. #ifndef BEFTS_MODE
  377. // Perform the oswaps for input level over the OP_Tight Compaction Network
  378. template <OSwap_Style oswap_style>
  379. void process_TCN(uint64_t N, uint8_t level, unsigned char *bfr_ptr, size_t block_size,
  380. uint64_t *LS_distance) {
  381. FOAV_SAFE2_CNTXT(process_TCN, N, level)
  382. FOAV_SAFE_CNTXT(process_TCN, block_size)
  383. uint64_t comparator_dist = (1<<level);
  384. // bfr_fop = bfr_first_operand_pointer, bfr_sop = bfr_second_operand_pointer
  385. unsigned char *bfr_fop = bfr_ptr;
  386. unsigned char *bfr_sop = bfr_ptr + (comparator_dist * block_size);
  387. // Number of oblivious swaps
  388. uint64_t num_oswaps = N - comparator_dist;
  389. uint64_t sop_index = comparator_dist;
  390. uint64_t fop_index = 0;
  391. FOAV_SAFE_CNTXT(process_TCN, num_oswaps)
  392. for(uint64_t i=0; i<num_oswaps; i++){
  393. uint64_t move_dist = LS_distance[sop_index] & (1 << (level+1)-1);
  394. // Obliviously if sop!=dummy AND move_dist!=0, set move_flag
  395. uint8_t dist_flag = ogt_set_flag(move_dist, 0);
  396. // but appropriate for 8 bytes oswaps.
  397. oswap_buffer<oswap_style>(bfr_fop, bfr_sop, block_size, dist_flag);
  398. // Adjust LS_distance after an oswap based on move_dist:
  399. // Obliviously if dist_flag, set LS_distance[thread][fop_index] to
  400. // (LS_distance[thread][sop_index]-move_dist)
  401. LS_distance[sop_index]-= move_dist;
  402. oset_value(&(LS_distance[fop_index]), LS_distance[sop_index], dist_flag);
  403. oset_value(&(LS_distance[sop_index]), 0, dist_flag);
  404. bfr_fop+=block_size;
  405. bfr_sop+=block_size;
  406. sop_index++;
  407. fop_index++;
  408. FOAV_SAFE2_CNTXT(process_TCN, i, num_oswaps)
  409. }
  410. }
  411. template <OSwap_Style oswap_style>
  412. void OP_TightCompact(unsigned char *buf, uint64_t N, size_t block_size, bool *selected_list){
  413. FOAV_SAFE2_CNTXT(OP_TightCompact, N, block_size)
  414. FOAV_SAFE2_CNTXT(OP_TightCompact, buf, selected_list)
  415. uint64_t *LS_distance = new uint64_t[N];
  416. int TCN_l = calculatelog2(N);
  417. compute_LS_distances(N, buf, block_size, selected_list, LS_distance);
  418. FOAV_SAFE_CNTXT(OP_TightCompact, TCN_l)
  419. for(int l=0; l<TCN_l; l++) {
  420. process_TCN<oswap_style>(N, l, buf, block_size, LS_distance);
  421. FOAV_SAFE2_CNTXT(OP_TightCompact, l, TCN_l)
  422. }
  423. delete[] LS_distance;
  424. }
  425. #endif
  426. #endif