| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338 | #ifndef __SORTINGNETWORK_TCC__#define __SORTINGNETWORK_TCC__// Merge operation for odd-even mergesort. Takes number of spaces to "skip" between items in buffer.// Left and right parts must be sorted, with left size a power of two and right size smaller.// Merges them to return a sorted result.template<OSwap_Style oswap_style>void OddEvenMerge(unsigned char *buf, uint64_t skip, uint64_t N, size_t block_size) {  unsigned char *block1;  unsigned char *block2;  FOAV_SAFE_CNTXT(OddEvenMerge, N)  if (N < 2) {    return;  }  FOAV_SAFE_CNTXT(OddEvenMerge, N)  if (N == 2) {    block1 = buf;    block2 = buf + block_size + (block_size*skip);    oswap_buffer<oswap_style>(block1, block2, block_size,           ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2)));    return;  }  // Merge odd items  OddEvenMerge<oswap_style>(buf, 2*skip+1, (N/2)+(N%2), block_size);  // Merge even items  OddEvenMerge<oswap_style>(buf+block_size+(block_size*skip), 2*skip+1, N/2, block_size);  // Compare-and-swap subsequent item pairs, skipping first item  block2 = buf;  FOAV_SAFE_CNTXT(OddEvenMerge, N)  for (int i=0; i<(N-1)/2; i++) {    FOAV_SAFE_CNTXT(OddEvenMerge, i)    block1 = block2 + block_size + (block_size*skip);    block2 = block1 + block_size + (block_size*skip);    oswap_buffer<oswap_style>(block1, block2, block_size,        ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2)));      }}template<OSwap_Style oswap_style>void OddEvenMergeSort(unsigned char *buf, uint64_t N, size_t block_size) {  // Perform compare-and-swaps  unsigned char *block1 = buf;  unsigned char *block2 = buf + block_size;  FOAV_SAFE_CNTXT(OddEvenMerge, N)  if (N < 2) {    return;  }  FOAV_SAFE_CNTXT(OddEvenMerge, N)  if (N == 2) {    bool swap_flag = ogt_set_flag(*((uint64_t *) block1), *((uint64_t *) block2));    oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);        return;  }  // Divide into maximum power of two and remainder  uint64_t N1 = pow2_lt(N);  uint64_t N2 = N - N1;  // Recursively sort left and right parts  OddEvenMergeSort<oswap_style>(buf, N1, block_size);  OddEvenMergeSort<oswap_style>(buf + block_size*N1, N2, block_size);  // Apply merge operation to complete sort  OddEvenMerge<oswap_style>(buf, 0, N, block_size);}template<OSwap_Style oswap_style, typename KeyType>void BitonicMerge(unsigned char *buffer, size_t N, size_t block_size, bool ascend) {  FOAV_SAFE2_CNTXT(BitonicMerge, N, block_size)  if(N<2){    return;  }  else if((N & (N * -1))!=N) {    size_t M = pow2_lt(N);    unsigned char *block1 = buffer;    unsigned char *block2 = buffer + (M * block_size);     size_t feasible_swaps = N - M;    FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, M)    for(size_t i=0; i<feasible_swaps; i++) {      FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, i)      uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);      FOAV_SAFE_CNTXT(BitonicMerge, ascend)      if(ascend){        oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);      } else {        oswap_buffer<oswap_style>(block1, block2, block_size, !swap_flag);      }      block1+=block_size;      block2+=block_size;       FOAV_SAFE2_CNTXT(BitonicMerge, feasible_swaps, i)    }     BitonicMerge<oswap_style, KeyType>(buffer, M, block_size, ascend);    BitonicMerge<oswap_style, KeyType>(buffer + (M * block_size), N-M, block_size, ascend);  }   else{ //Power of 2 case    size_t split = N/2;    unsigned char *block1 = buffer;    unsigned char *block2 = buffer + (split * block_size);         FOAV_SAFE_CNTXT(BitonicSort, split)    for(size_t i=0; i<split; i++) {    FOAV_SAFE_CNTXT(BitonicSort, i)    FOAV_SAFE_CNTXT(BitonicSort, split)      uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);      FOAV_SAFE_CNTXT(BitonicSort, ascend)      if(ascend){        oswap_buffer<oswap_style>(block1, block2, block_size, swap_flag);        //ogt_comp_swap((uint64_t *) block1, (uint64_t *) block2, block1, block2, block_size);      } else {        oswap_buffer<oswap_style>(block1, block2, block_size, !swap_flag);        //ogt_comp_swap((uint64_t *) block2, (uint64_t *) block1, block2, block1, block_size);      }      block1+=block_size;      block2+=block_size;     }     BitonicMerge<oswap_style, KeyType>(buffer, split, block_size, ascend);    BitonicMerge<oswap_style, KeyType>(buffer + (split * block_size), split, block_size, ascend);  }}template<OSwap_Style oswap_style, typename KeyType = uint64_t>void BitonicSort(unsigned char *buffer, size_t N, size_t block_size, bool ascend) {  FOAV_SAFE_CNTXT(BitonicSort, N)  if(N < 2){    return;  }  else {  // Handle non-power of 2 case:    size_t N1 = N/2;     BitonicSort<oswap_style, KeyType>(buffer, N1, block_size, !ascend);    BitonicSort<oswap_style, KeyType>(buffer + (block_size * N1), N-N1, block_size, ascend);    BitonicMerge<oswap_style, KeyType>(buffer, N, block_size, ascend);  }}template<OSwap_Style oswap_style, typename KeyType>void BitonicMerge(unsigned char *keys, size_t N, unsigned char *associated_data1,       unsigned char *associated_data2, size_t data_size, bool ascend) {  if(associated_data1==NULL) {    if(N<2){      return;    }    else if((N & (N * -1))!=N) {      size_t M = pow2_lt(N);      unsigned char *block1 = keys;      unsigned char *block2 = keys + (M * sizeof(KeyType));      size_t feasible_swaps = N - M;      for(size_t i=0; i<feasible_swaps; i++) {        uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);        if(ascend){          oswap_buffer<oswap_style>(block1, block2, data_size, swap_flag);        } else {          oswap_buffer<oswap_style>(block1, block2, data_size, !swap_flag);        }        block1+=data_size;        block2+=data_size;       }         BitonicMerge<oswap_style, KeyType>(keys, M, associated_data1, associated_data2, data_size, ascend);      BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1, associated_data2, data_size, ascend);    }     else{ //Power of 2 case      size_t split = N/2;      unsigned char *block1 = keys;      unsigned char *block2 = keys + (split * sizeof(KeyType));             for(size_t i=0; i<split; i++) {        uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);        if(ascend){          oswap_buffer<oswap_style>(block1, block2, data_size, swap_flag);          //ogt_comp_swap((uint64_t *) block1, (uint64_t *) block2, block1, block2, block_size);        } else {          oswap_buffer<oswap_style>(block1, block2, data_size, !swap_flag);          //ogt_comp_swap((uint64_t *) block2, (uint64_t *) block1, block2, block1, block_size);        }        block1+=data_size;        block2+=data_size;       }       BitonicMerge<oswap_style, KeyType>(keys, split, data_size, ascend);      BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), split, data_size, ascend);    }  } else{    if(N<2){      return;    }    else if((N & (N * -1))!=N) {      size_t M = pow2_lt(N);      unsigned char *block1 = keys;      unsigned char *block2 = keys + (M * sizeof(KeyType));      size_t feasible_swaps = N - M;      unsigned char *adata1_l = associated_data1;      unsigned char *adata1_r = associated_data1 + (M * data_size);      unsigned char *adata2_l = associated_data2;      unsigned char *adata2_r = associated_data2;            if(associated_data2!=NULL) {        adata2_r = associated_data2 + (M * data_size);      }      for(size_t i=0; i<feasible_swaps; i++) {        uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);        if(ascend){          oswap_key<KeyType>(block1, block2, swap_flag);          oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, swap_flag);          if(associated_data2!=NULL){            oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, swap_flag);          }        } else {          oswap_key<KeyType>(block1, block2, !swap_flag);          oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, !swap_flag);          if(associated_data2!=NULL){            oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, !swap_flag);          }        }        block1+=sizeof(KeyType);        block2+=sizeof(KeyType);        adata1_l+=data_size;        adata1_r+=data_size;        if(associated_data2!=NULL){          adata2_l+=data_size;          adata2_r+=data_size;        }      }         BitonicMerge<oswap_style, KeyType>(keys, M, associated_data1, associated_data2, data_size, ascend);      if(associated_data2==NULL)        BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1 + (M*data_size), associated_data2, data_size, ascend);      else        BitonicMerge<oswap_style, KeyType>(keys + (M * sizeof(KeyType)), N-M, associated_data1 + (M*data_size), associated_data2 + (M*data_size), data_size, ascend);    }     else{ //Power of 2 case      size_t split = N/2;      unsigned char *block1 = keys;      unsigned char *block2 = keys + (split * sizeof(KeyType));       unsigned char *adata1_l = associated_data1;      unsigned char *adata1_r = associated_data1 + (split * data_size);      unsigned char *adata2_l = associated_data2;      unsigned char *adata2_r = associated_data2;            if(associated_data2!=NULL) {        adata2_r = associated_data2 + (split * data_size);      }            for(size_t i=0; i<split; i++) {        uint8_t swap_flag = ogt<KeyType>((KeyType*)block1, (KeyType*)block2);        if(ascend){          oswap_key<KeyType>(block1, block2, swap_flag);          oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, swap_flag);          if(associated_data2!=NULL){            oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, swap_flag);          }        } else {          oswap_key<KeyType>(block1, block2, !swap_flag);          oswap_buffer<oswap_style>(adata1_l, adata1_r, data_size, !swap_flag);          if(associated_data2!=NULL){            oswap_buffer<oswap_style>(adata2_l, adata2_r, data_size, !swap_flag);          }        }        block1+=sizeof(KeyType);        block2+=sizeof(KeyType);         adata1_l+=data_size;        adata1_r+=data_size;        if(associated_data2!=NULL){          adata2_l+=data_size;          adata2_r+=data_size;        }      }       BitonicMerge<oswap_style, KeyType>(keys, split, associated_data1, associated_data2, data_size, ascend);      if(associated_data2==NULL)        BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), N-split, associated_data1 + (split*data_size), associated_data2, data_size, ascend);      else        BitonicMerge<oswap_style, KeyType>(keys + (split * sizeof(KeyType)), N-split, associated_data1 + (split*data_size), associated_data2 + (split*data_size), data_size, ascend);    }  }}template<OSwap_Style oswap_style, typename KeyType = uint64_t>void BitonicSort(unsigned char *keys, size_t N, unsigned char *associated_data1,       unsigned char *associated_data2, size_t data_size, bool ascend) {  FOAV_SAFE_CNTXT(BitonicSort, N)  if(N < 2){    return;  }  else {    size_t N1 = N/2;    FOAV_SAFE_CNTXT(BitonicSort, associated_data1)    FOAV_SAFE_CNTXT(BitonicSort, associated_data2)    if(associated_data1==NULL){      BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,                    data_size, !ascend);      // Increment keys by N1 data_size blocks, since keys holds the entire buffer to sort.      BitonicSort<oswap_style, KeyType>(keys + (N1*data_size), N-N1, associated_data1,                    associated_data2, data_size, ascend);      BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,                    data_size, ascend);    } else if(associated_data2==NULL){      //There is only one associated_data list.      BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,                    data_size, !ascend);      BitonicSort<oswap_style, KeyType>(keys + (N1*sizeof(KeyType)), N-N1, associated_data1 + (N1*data_size),                    associated_data2, data_size, ascend);      BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,                    data_size, ascend);      FOAV_SAFE_CNTXT(BitonicSort, associated_data1)      FOAV_SAFE_CNTXT(BitonicSort, associated_data2)    } else {       //Both associated_data lists.      BitonicSort<oswap_style, KeyType>(keys, N1, associated_data1, associated_data2,                    data_size, !ascend);      BitonicSort<oswap_style, KeyType>(keys + (N1*sizeof(KeyType)), N-N1, associated_data1 + (N1*data_size),                    associated_data2 + (N1*data_size), data_size, ascend);      BitonicMerge<oswap_style, KeyType>(keys, N, associated_data1, associated_data2,                    data_size, ascend);    }  }  }#endif
 |