#ifndef LOWMC_BLOCK_H__ #define LOWMC_BLOCK_H__ #include // std::bitset #include // std::string #include // std::istream and std::ostream #include // SSE and AVX intrinsics // namespace lowmc // { template union block { public: typedef __mX value_type; block(const uint64_t input = 0ULL) : bits(input) { } block(const __mX & val) : mX(val) { } block(const std::string bit_string) : bits(bit_string) { } inline operator __mX() const { return mX; } inline block<__mX> & operator=(const __mX & val) { mX = val; return *this; } inline bool operator==(const __mX & rhs) const; inline bool operator!=(const __mX & rhs) const { return !(*this == rhs); } inline typename std::bitset::reference operator[](const size_t pos) { return bits[pos]; } inline const bool operator[](const size_t pos) const { return bits[pos]; } constexpr inline size_t size() const { return sizeof(__mX) * 8; } inline const unsigned parity() const { return bits.count() % 2; } inline void shiftr(const size_t pos) { bits >>= pos; } inline void shiftl(const size_t pos) { bits <<= pos; } std::bitset<8 * sizeof(__mX)> bits; //private: block(std::bitset<8 * sizeof(__mX)> & bitset) : bits(bitset) { } __mX mX; }; template<> inline bool block<__m128i>::operator==(const __m128i & rhs) const { auto vcmp = _mm_xor_si128(*this, rhs); return _mm_testz_si128(vcmp, vcmp); } template<> inline bool block<__m256i>::operator==(const __m256i & rhs) const { auto vcmp = _mm256_xor_si256(*this, rhs); return _mm256_testz_si256(vcmp, vcmp); } template inline block<__mX> operator|(const block<__mX> & block1, const block<__mX> & block2); template<> inline block<__m256i> operator|(const block<__m256i> & block1, const block<__m256i> & block2) { return _mm256_or_si256(block1, block2); } template<> inline block<__m128i> operator|(const block<__m128i> & block1, const block<__m128i> & block2) { return _mm_or_si128(block1, block2); } template inline block<__mX> operator&(const block<__mX> & block1, const block<__mX> & block2); template<> inline block<__m256i> operator&(const block<__m256i> & block1, const block<__m256i> & block2) { return _mm256_and_si256(block1, block2); } template<> inline block<__m128i> operator&(const block<__m128i> & block1, const block<__m128i> & block2) { return _mm_and_si128(block1, block2); } template inline block<__mX> operator^(const block<__mX> & block1, const block<__mX> & block2); template<> inline block<__m256i> operator^(const block<__m256i> & block1, const block<__m256i> & block2) { return _mm256_xor_si256(block1, block2); } template<> inline block<__m128i> operator^(const block<__m128i> & block1, const block<__m128i> & block2) { return _mm_xor_si128(block1, block2); } template inline block<__mX> & operator^=(block<__mX> & block1, const block<__mX> & block2); template<> inline block<__m256i> & operator^=(block<__m256i> & block1, const block<__m256i> & block2) { block1 = _mm256_xor_si256(block1, block2); return block1; } template<> inline block<__m128i> & operator^=(block<__m128i> & block1, const block<__m128i> & block2) { block1 = _mm_xor_si128(block1, block2); return block1; } template inline block<__mX> operator~(const block<__mX> & block); template<> inline block<__m256i> operator~(const block<__m256i> & block) { return ~static_cast<__m256i>(block); } template<> inline block<__m128i> operator~(const block<__m128i> & block) { return ~static_cast<__m128i>(block); } template inline block<__mX> operator<<(const block<__mX> & block, const long & shift); template<> inline block<__m256i> operator<<(const block<__m256i> & block , const long & shift) { return _mm256_or_si256(_mm256_slli_epi64(block, shift), _mm256_blend_epi32(_mm256_setzero_si256(), _mm256_permute4x64_epi64(_mm256_srli_epi64(block, 64 - shift), _MM_SHUFFLE(2,1,0,0)), _MM_SHUFFLE(3,3,3,0))); } template<> inline block<__m128i> operator<<(const block<__m128i> & block, const long & shift) { return _mm_or_si128(_mm_slli_epi64(block, shift), _mm_srli_epi64(_mm_slli_si128(block, 8), 64 - shift)); } template inline block<__mX> & operator<<=(block<__mX> & block, const long & shift) { block = block << shift; return block; } template inline block<__mX> operator>>(const block<__mX> & block, const long & shift); template<> inline block<__m256i> operator>>(const block<__m256i> & block, const long & shift) { return _mm256_or_si256(_mm256_srli_epi64(block, shift), _mm256_blend_epi32(_mm256_setzero_si256(), _mm256_permute4x64_epi64(_mm256_slli_epi64(block, 64 - shift), _MM_SHUFFLE(0,3,2,1)), _MM_SHUFFLE(0,3,3,3))); } template<> inline block<__m128i> operator>>(const block<__m128i> & block, const long & shift) { return _mm_or_si128(_mm_srli_epi64(block, shift), _mm_slli_epi64(_mm_srli_si128(block, 8), 64 - shift)); } template inline block<__mX> & operator>>=(block<__mX> & block, const long & shift) { block = block >> shift; return block; } //} // namespace lowmc #endif // LOWMC_BLOCK_H__