bitutils.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /* Copyright (C) 2019 Anonymous
  2. *
  3. * This is a pre-release version of the DPF++ library distributed anonymously
  4. * for peer review. A public release of the software will be published under the
  5. * LPGL v2.1 license in the near future. Please do not redistribute this version
  6. * of the software.
  7. */
  8. #ifndef DPF_BITUTILS_H__
  9. #define DPF_BITUTILS_H__
  10. #include <bitset> // std::bitset
  11. #include <x86intrin.h> // SSE and AVX intrinsics
  12. namespace dpf
  13. {
  14. static const __m128i bool128_mask[2] = {
  15. _mm_set_epi64x(0,1), // 0b00...0001
  16. _mm_set_epi64x(1,0) // 0b00...0001 << 64
  17. };
  18. static const __m256i bool256_mask[4] = {
  19. _mm256_set_epi64x(0,0,0,1), // 0b00...0001
  20. _mm256_set_epi64x(0,0,1,0), // 0b00...0001 << 64
  21. _mm256_set_epi64x(0,1,0,0), // 0b00...0001 << 128
  22. _mm256_set_epi64x(1,0,0,0) // 0b00...0001 << 192
  23. };
  24. static const __m128i lsb128_mask[4] = {
  25. _mm_setzero_si128(), // 0b00...0000
  26. _mm_set_epi64x(0,1), // 0b00...0001
  27. _mm_set_epi64x(0,2), // 0b00...0010
  28. _mm_set_epi64x(0,3) // 0b00...0011
  29. };
  30. static const __m128i lsb128_mask_inv[4] = {
  31. _mm_set1_epi8(-1), // 0b11...1111
  32. _mm_set_epi64x(-1,-2), // 0b11...1110
  33. _mm_set_epi64x(-1,-3), // 0b11...1101
  34. _mm_set_epi64x(-1,-4) // 0b11...1100
  35. };
  36. static const __m128i if128_mask[2] = {
  37. _mm_setzero_si128(), // 0b00...0000
  38. _mm_set1_epi8(-1) // 0b11...1111
  39. };
  40. static const __m256i lsb256_mask[4] = {
  41. _mm256_setzero_si256(), // 0b00...0000
  42. _mm256_set_epi64x(0,0,0,1), // 0b00...0001
  43. _mm256_set_epi64x(0,0,0,2), // 0b00...0010
  44. _mm256_set_epi64x(0,0,0,3) // 0b00...0011
  45. };
  46. static const __m256i lsb256_mask_inv[4] = {
  47. _mm256_set1_epi8(-1), // 0b11...1111
  48. _mm256_set_epi64x(-1,-1,-1,-2), // 0b11...1110
  49. _mm256_set_epi64x(-1,-1,-1,-3), // 0b11...1101
  50. _mm256_set_epi64x(-1,-1,-1,-4) // 0b11...1100
  51. };
  52. static const __m256i if256_mask[2] = {
  53. _mm256_setzero_si256(), // 0b00...0000
  54. _mm256_set1_epi8(-1) // 0b11...1111
  55. };
  56. inline __m128i xor_if(const __m128i & block1, const __m128i & block2, __m128i flag)
  57. {
  58. return _mm_xor_si128(block1, _mm_and_si128(block2, flag));
  59. }
  60. inline __m256i xor_if(const __m256i & block1, const __m256i & block2, __m256i flag)
  61. {
  62. return _mm256_xor_si256(block1, _mm256_and_si256(block2, flag));
  63. }
  64. inline __m128i xor_if(const __m128i & block1, const __m128i & block2, bool flag)
  65. {
  66. return _mm_xor_si128(block1, _mm_and_si128(block2, if128_mask[flag ? 1 : 0]));
  67. }
  68. inline __m256i xor_if(const __m256i & block1, const __m256i & block2, bool flag)
  69. {
  70. return _mm256_xor_si256(block1, _mm256_and_si256(block2, if256_mask[flag ? 1 : 0]));
  71. }
  72. inline uint8_t get_lsb(const __m128i & block, uint8_t bits = 0b01)
  73. {
  74. __m128i vcmp = _mm_xor_si128(_mm_and_si128(block, lsb128_mask[bits]), lsb128_mask[bits]);
  75. return static_cast<uint8_t>(_mm_testz_si128(vcmp, vcmp));
  76. }
  77. inline uint8_t get_lsb(const __m256i & block, uint8_t bits = 0b01)
  78. {
  79. __m256i vcmp = _mm256_xor_si256(_mm256_and_si256(block, lsb256_mask[bits]), lsb256_mask[bits]);
  80. return static_cast<uint8_t>(_mm256_testz_si256(vcmp, vcmp));
  81. }
  82. template <typename __mX>
  83. inline uint8_t get_lsb01(const __mX & block) { return get_lsb(block, 0b01); }
  84. template <typename __mX>
  85. inline uint8_t get_lsb10(const __mX & block) { return get_lsb(block, 0b10); }
  86. inline __m128i clear_lsb(const __m128i & block, uint8_t bits = 0b01)
  87. {
  88. return _mm_and_si128(block, lsb128_mask_inv[bits]);
  89. }
  90. inline __m256i clear_lsb(const __m256i & block, uint8_t bits = 0b01)
  91. {
  92. return _mm256_and_si256(block, lsb256_mask_inv[bits]);
  93. }
  94. // template<typename row_t = __m256i >
  95. // inline std::array<row_t, 128> bitsliced_clear_lsb(std::array<row_t, 128>& block, uint8_t bits = 0b11)
  96. // {
  97. // if(bits == 0b11)
  98. // {
  99. // block[0] = _mm_set_epi64x(0, 0);
  100. // block[1] = _mm_set_epi64x(0, 0);
  101. // }
  102. // if(bits == 0b01)
  103. // {
  104. // block[0] = _mm_set_epi64x(0, 0);
  105. // }
  106. // return block;
  107. // }
  108. template<typename row_t = __m256i, size_t nrows >
  109. inline row_t bitslicled_get_lsb(std::array<row_t, nrows> block, uint8_t bit = 0b01)
  110. {
  111. if(bit == 0b01)
  112. {
  113. return block[0];
  114. }
  115. else if (bit == 0b10)
  116. {
  117. return block[1];
  118. }
  119. else
  120. {
  121. return block[0];
  122. }
  123. }
  124. template <typename __mX>
  125. inline __mX clear_lsb01(const __mX & block) { return clear_lsb(block, 0b01); }
  126. template <typename __mX>
  127. inline __mX clear_lsb10(const __mX & block) { return clear_lsb(block, 0b10); }
  128. template <typename __mX>
  129. inline __mX clear_lsb11(const __mX & block) { return clear_lsb(block, 0b11); }
  130. inline void set_ones(__m128i & input)
  131. {
  132. input = _mm_set1_epi64x(-1);
  133. }
  134. inline void set_ones(__m256i & input)
  135. {
  136. input = _mm256_set1_epi64x(-1);
  137. }
  138. inline void set_zeros(__m128i & input)
  139. {
  140. input = _mm_setzero_si128();
  141. }
  142. inline void set_zeros(__m256i & input)
  143. {
  144. input = _mm256_setzero_si256();
  145. }
  146. // inline void zeros(block<__m128i> & input)
  147. // {
  148. // input = _mm_setzero_si128();
  149. // }
  150. // inline void zeros(block<__m256i> & input)
  151. // {
  152. // input = _mm256_setzero_si256();
  153. // }
  154. inline __m128i set_lsb(const __m128i & block, const bool val = true)
  155. {
  156. return _mm_or_si128(clear_lsb(block, 0b01), lsb128_mask[val ? 0b01 : 0b00]);
  157. }
  158. inline __m256i set_lsb(const __m256i & block, const bool val = true)
  159. {
  160. return _mm256_or_si256(clear_lsb(block, 0b01), lsb256_mask[val ? 0b01 : 0b00]);;
  161. }
  162. inline __m128i set_lsbs(const __m128i & block, const bool bits[2])
  163. {
  164. int i = (bits[0] ? 1 : 0) + 2 * (bits[1] ? 1 : 0);
  165. return _mm_or_si128(clear_lsb(block, 0b11), lsb128_mask[i]);
  166. }
  167. inline __m256i set_lsbs(const __m256i & block, const bool bits[2])
  168. {
  169. int i = (bits[0] ? 1 : 0) + 2 * (bits[1] ? 1 : 0);
  170. return _mm256_or_si256(clear_lsb(block, 0b11), lsb256_mask[i]);
  171. }
  172. } // namespace lowmc
  173. #endif // DPF_BITUTILS_H__