bitutils.hpp 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. /* Adapted from preprocessing/bitutils.h from
  2. * https://git-crysp.uwaterloo.ca/avadapal/duoram by Adithya Vadapalli,
  3. * itself adapted from code by Ryan Henry */
  4. #ifndef __BITUTILS_HPP__
  5. #define __BITUTILS_HPP__
  6. #include <x86intrin.h> // SSE and AVX intrinsics
  7. static const __m128i bool128_mask[2] = {
  8. _mm_set_epi64x(0,1), // 0b00...0001
  9. _mm_set_epi64x(1,0) // 0b00...0001 << 64
  10. };
  11. static const __m128i lsb128_mask[4] = {
  12. _mm_setzero_si128(), // 0b00...0000
  13. _mm_set_epi64x(0,1), // 0b00...0001
  14. _mm_set_epi64x(0,2), // 0b00...0010
  15. _mm_set_epi64x(0,3) // 0b00...0011
  16. };
  17. static const __m128i lsb128_mask_inv[4] = {
  18. _mm_set1_epi8(-1), // 0b11...1111
  19. _mm_set_epi64x(-1,-2), // 0b11...1110
  20. _mm_set_epi64x(-1,-3), // 0b11...1101
  21. _mm_set_epi64x(-1,-4) // 0b11...1100
  22. };
  23. static const __m128i if128_mask[2] = {
  24. _mm_setzero_si128(), // 0b00...0000
  25. _mm_set1_epi8(-1) // 0b11...1111
  26. };
  27. inline __m128i xor_if(const __m128i & block1, const __m128i & block2, __m128i flag)
  28. {
  29. return _mm_xor_si128(block1, _mm_and_si128(block2, flag));
  30. }
  31. inline __m128i xor_if(const __m128i & block1, const __m128i & block2, bool flag)
  32. {
  33. return _mm_xor_si128(block1, _mm_and_si128(block2, if128_mask[flag ? 1 : 0]));
  34. }
  35. inline uint8_t get_lsb(const __m128i & block, uint8_t bits = 0b01)
  36. {
  37. __m128i vcmp = _mm_xor_si128(_mm_and_si128(block, lsb128_mask[bits]), lsb128_mask[bits]);
  38. return static_cast<uint8_t>(_mm_testz_si128(vcmp, vcmp));
  39. }
  40. inline __m128i clear_lsb(const __m128i & block, uint8_t bits = 0b01)
  41. {
  42. return _mm_and_si128(block, lsb128_mask_inv[bits]);
  43. }
  44. inline __m128i set_lsb(const __m128i & block, const bool val = true)
  45. {
  46. return _mm_or_si128(clear_lsb(block, 0b01), lsb128_mask[val ? 0b01 : 0b00]);
  47. }
  48. #endif