/* Copyright (C) 2019 Anonymous * * This is a pre-release version of the DPF++ library distributed anonymously * for peer review. A public release of the software will be published under the * LPGL v2.1 license in the near future. Please do not redistribute this version * of the software. */ // #ifndef DPFPP_DPF_H__ // #define DPFPP_DPF_H__ // #include // std::is_same<> // #include // std::numeric_limits<> // #include // CHAR_BIT // #include // std::log2, std::ceil, std::floor // #include // std::runtime_error // #include // std::array<> // #include // std::istream and std::ostream // #include // std::vector<> // #include // std::shared_ptr<> // #include // std::move // #include // std::copy // #include // std::memcpy // #include // arc4random_buf // #include // SSE and AVX intrinsics // #include // #include "bitutils.h" // #include "../block.h" // #include "prg.h" // #include "prg_aes_impl.h" // constexpr int L = 0; // constexpr int R = 1; // const size_t ncores = 16; // uint64_t progress[ncores] = {0}; // namespace dpf // { // template // node_t init_val(uint64_t val); // template <> __m128i init_val(uint64_t val) { return _mm_set_epi64x(0, val); } // template <> __m256i init_val(uint64_t val) { return _mm256_set_epi64x(0, 0, 0, val); } // // template> // // struct dpf_key; // template // struct dpf_key; // template> // struct bitsliced_dpf_key; // using slicerow_t = block<__m256i>; // using sliceblock_t = std::array; // template // inline leaf_t eval(const dpf_key & dpfkey, const size_t input); // template // inline void evalinterval(const dpf_key & dpfkey, const size_t from, const size_t to, leaf_t * output, uint8_t * t = NULL); // template // inline void evalfull(const dpf_key & dpfkey, leaf_t * output, uint8_t * t = NULL); // template // static inline void expand(const prgkey_t & prgkey, const node_t & seed, node_t s[2], uint8_t t[2], int lsbmask = 0b11) // { // dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2); // t[L] = get_lsb(s[L]); // s[L] = clear_lsb(s[L], lsbmask); // t[R] = get_lsb(s[R]); // s[R] = clear_lsb(s[R], lsbmask); // } // dpf::expand // template // static inline void traverse2(const prgkey_t & prgkey, const node_t & seed, // const uint8_t cw_t[2], const node_t & cw, const uint8_t prev_t, // node_t s[2], uint8_t t[2], int lsbmask = 0b11) // { // dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2); // t[L] = get_lsb(s[L]) ^ (cw_t[L] & prev_t);; // s[L] = clear_lsb(xor_if(s[L], cw, !prev_t), lsbmask); // t[R] = get_lsb(s[R]) ^ (cw_t[R] & prev_t);; // s[R] = clear_lsb(xor_if(s[R], cw, !prev_t), lsbmask); // } // dpf::expand // template // static inline void traverse(const prgkey_t & prgkey, const node_t & seed, const bool direction, // const uint8_t cw_t, const node_t & cw, const uint8_t prev_t, // node_t & s, uint8_t & t, int lsbmask = 0b11) // { // dpf::PRG(prgkey, clear_lsb(seed, 0b11), &s, 1, direction); // t = get_lsb(s) ^ (cw_t & prev_t); // s = clear_lsb(xor_if(s, cw, !prev_t), lsbmask); // } // dpf::traverse // template // static inline void stretch_leaf(const prgkey_t & prgkey, const typename finalizer_t::value_type & seed, finalizer_t & s) // { // dpf::PRG(prgkey, clear_lsb(seed, 0b11), &s, s.size()); // } // dpf::stretch_leaf // template // struct dpf_key final // { // public: // static constexpr size_t bits_per_leaf = std::is_same::value ? 1 : sizeof(leaf_t) * CHAR_BIT; // static constexpr bool is_packed = (sizeof(leaf_t) < sizeof(node_t)); // static constexpr size_t leaves_per_node = dpf_key::is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1; // static constexpr size_t nodes_per_leaf = dpf_key::is_packed ? 1 : std::ceil(static_cast(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT)); // static_assert(leaves_per_node * bits_per_leaf == sizeof(node_t) * CHAR_BIT // || nodes_per_leaf * sizeof(node_t) == sizeof(leaf_t)); // using finalizer_t = std::array; // typedef std::pair (*finalizer_callback)(const prgkey_t &, const size_t, const leaf_t &, const node_t[2], const uint8_t[2]); // inline static constexpr size_t depth(const size_t nitems) { return std::ceil(std::log2(std::ceil(static_cast(nitems) / dpf_key::leaves_per_node))); } // inline constexpr size_t depth() const { return dpf_key::depth(nitems); } // inline static constexpr size_t input_bits(const size_t nitems) { return std::ceil(std::log2(nitems)); } // inline constexpr size_t input_bits() const { return dpf_key::input_bits(nitems); } // inline static constexpr size_t nodes_in_interval(const size_t from, const size_t to) { return (to < from) ? 0 : std::max(1.0, std::ceil(static_cast(to+1) / leaves_per_node) - std::floor(static_cast(from) / leaves_per_node)); } // inline static constexpr size_t interval_bytes(const size_t from, const size_t to) { return nodes_in_interval(from, to) * (is_packed ? sizeof(node_t) : sizeof(leaf_t)); } // inline constexpr size_t full_bytes() { return interval_bytes(0, nitems-1); } // inline static constexpr size_t nodes_at_leaf_layer(const size_t nitems) { return std::ceil(static_cast(nitems) / dpf_key::leaves_per_node); } // inline constexpr size_t nodes_at_leaf_layer() const { return dpf_key::nodes_at_leaf_layer(nitems); } // inline dpf_key(dpf_key &&) = default; // inline dpf_key & operator=(dpf_key &&) = default; // inline dpf_key(const dpf_key &) = default; // inline dpf_key & operator=(const dpf_key &) = default; // inline bool operator==(const dpf_key & rhs) const { return nitems == rhs.nitems && root == rhs.root && cw == rhs.cw && finalizer == rhs.finalizer; } // inline bool operator!=(const dpf_key & rhs) const { return !(*this == rhs); } // static auto default_make_finalizer(const prgkey_t & prgkey, const size_t target, const leaf_t & val, const node_t s[2], const uint8_t t[2]) // { // finalizer_t finalizer; // finalizer_t stretched[2]; // stretch_leaf(prgkey, s[L], stretched[L]); // stretch_leaf(prgkey, s[R], stretched[R]); // if constexpr(dpf_key::is_packed) // { // auto finalizer0 = reinterpret_cast(&finalizer[0]); // if constexpr(std::numeric_limits::is_integer) // { // if constexpr(std::is_same::value) // { // *finalizer0 = (node_t)init_val<__m128i>(val ? 1 : 0); // //*finalizer0 = init_val(val ? 1 : 0); // } // else // { // typedef typename std::make_unsigned_t unsigned_leaf_t; // *finalizer0 = init_val(static_cast(val)); // } // auto tmp = reinterpret_cast *>(finalizer0); // *tmp <<= bits_per_leaf * (target % leaves_per_node); // } // else // { // *finalizer0 = val; // } // } // else // { // std::memcpy(&finalizer[0], &val, sizeof(finalizer_t)); // } // for (size_t j = 0; j < nodes_per_leaf; ++j) // { // finalizer[j] ^= stretched[L][j] ^ stretched[R][j]; // } // return std::make_pair(finalizer, finalizer); // } // dpf_key::default_make_finalizer // static auto gen(const prgkey_t & prgkey, size_t nitems, size_t target, const leaf_t & val = 1, const finalizer_callback make_finalizer = default_make_finalizer) // { // if (nitems <= target) // { // throw std::runtime_error("target point out of range"); // } // node_t root[2]; // srand(1); // arc4random_buf(root, sizeof(root)); // root[0][0] = rand(); // root[0][1] = rand(); // root[1][0] = rand(); // root[1][1] = rand(); // uint8_t t[2] = { get_lsb(root[0]), !t[0] }; // root[1] = set_lsb(root[1], t[1]); // node_t s[2] = { root[0], root[1] }; // const size_t depth = dpf_key::depth(nitems); // std::vector cw; // cw.reserve(depth); // node_t s0[2], s1[2]; // uint8_t t0[2], t1[2]; // const size_t nbits = input_bits(nitems); // for (size_t layer = 0; layer < depth; ++layer) // { // const uint8_t bit = (target >> (nbits - layer - 1)) & 1U; // expand(prgkey, s[0], s0, t0); // expand(prgkey, s[1], s1, t1); // const uint8_t keep = (bit == 0) ? L : R, lose = 1 - keep; // bool cwt[2] = { // cwt[L] = t0[L] ^ t1[L] ^ bit ^ 1, // cwt[R] = t0[R] ^ t1[R] ^ bit // }; // auto nextcw = s0[lose] ^ s1[lose]; // s[L] = xor_if(s0[keep], nextcw, !t[L]); // t[L] = t0[keep] ^ (t[L] & cwt[keep]); // s[R] = xor_if(s1[keep], nextcw, !t[R]); // t[R] = t1[keep] ^ (t[R] & cwt[keep]); // cw.emplace_back(set_lsbs(nextcw, cwt)); // } // cw.shrink_to_fit(); // auto [finalizer0, finalizer1] = make_finalizer(prgkey, target, val, s, t); // return std::make_pair( // std::forward(dpf_key(nitems, root[0], cw, finalizer0, prgkey)), // std::forward(dpf_key(nitems, root[1], cw, finalizer1, prgkey))); // } // dpf_key::gen // inline leaf_t eval(const size_t input) const { return std::forward(dpf::eval(*this, input)); } // inline void evalinterval(const size_t from, const size_t to, leaf_t * output, uint8_t * t = NULL) const { dpf::evalinterval(*this, from, to, output, t); } // inline void evalfull(leaf_t * output, uint8_t * t = NULL) const { dpf::evalfull(*this, output, t); } // const size_t nitems; // const node_t root; // const std::vector cw; // const finalizer_t finalizer; // const prgkey_t prgkey; // private: // dpf_key(size_t nitems_, const node_t & root_, const std::vector cw_, // const finalizer_t & finalizer_, const prgkey_t & prgkey_) // : nitems(nitems_), // root(root_), // cw(cw_), // finalizer(finalizer_), // prgkey(prgkey_) { } //}; // struct dpf::dpf_key // template // inline leaf_t getword(const node_t & S, const size_t input) // { // auto S_ = reinterpret_cast(&S); // if constexpr(sizeof(leaf_t) >= sizeof(node_t)) return *S_; // return S_[input]; // } // dpf::getword // template<> // inline bool getword(const __m128i & S, const size_t input) // { // const __m128i mask = bool128_mask[input / 64]; // __m128i vcmp = _mm_xor_si128(_mm_and_si128(S >> (input % 64), mask), mask); // return static_cast(_mm_testz_si128(vcmp, vcmp)); // } // dpf::getword<__m128i,bool> // template<> // inline bool getword(const __m256i & S, const size_t input) // { // const __m256i mask = bool256_mask[input / 64]; // __m256i vcmp = _mm256_xor_si256(_mm256_and_si256(S >> (input % 64), mask), mask); // return static_cast(_mm256_testz_si256(vcmp, vcmp)); // } // dpf::getword<__m256i,bool> // template // inline void finalize(const prgkey_t & prgkey, std::array::nodes_per_leaf> finalizer, leaf_t * output, node_t * s, size_t nnodes, uint8_t * t) // { // auto output_ = reinterpret_cast::nodes_per_leaf> *>(output); // for (size_t i = 0; i < nnodes; ++i) // { // stretch_leaf(prgkey, s[i], output_[i]); // for (size_t j = 0; j < dpf_key::nodes_per_leaf; ++j) // { // output_[i][j] = xor_if(output_[i][j], finalizer[j], t[i]); // } // } // } // dpf::finalize // template // inline void __evalinterval(const dpf_key & dpfkey, const size_t from, const size_t to, leaf_t * output, uint8_t * _t) // { // auto nodes_per_leaf = dpfkey.nodes_per_leaf; // auto depth = dpfkey.depth(); // auto nbits = dpfkey.input_bits(); // auto nodes_in_interval = dpfkey.nodes_in_interval(from, to); // auto root = dpfkey.root; // auto prgkey = dpfkey.prgkey; // const size_t from_node = std::floor(static_cast(from) / nodes_per_leaf); // node_t * s[2] = { // reinterpret_cast(output) + nodes_in_interval * (nodes_per_leaf - 1), // s[0] + nodes_in_interval / 2 // }; // uint8_t * t[2] = { _t, _t + nodes_in_interval / 2}; // int curlayer = depth % 2; // s[curlayer][0] = root; // t[curlayer][0] = get_lsb(root, 0b01); // //printf("depth = %u\n", depth); // for (size_t layer = 0; layer < depth; ++layer) // { // auto & cw = dpfkey.cw[layer]; // uint8_t cw_t[2] = { get_lsb(cw, 0b01), get_lsb(cw, 0b10) }; // curlayer = 1-curlayer; // size_t i=0, j=0; // auto nextbit = (from_node >> (nbits-layer-1)) & 1; // size_t nodes_in_prev_layer = std::ceil(static_cast(nodes_in_interval) / (1ULL << (depth-layer))); // size_t nodes_in_cur_layer = std::ceil(static_cast(nodes_in_interval) / (1ULL << (depth-layer-1))); // //printf("nextbit = %u\n", (from_node >> (nbits-layer-1))); // if (nextbit == 1) traverse(prgkey, s[1-curlayer][0], R, cw_t[R], cw, t[1-curlayer][j], s[curlayer][0], t[curlayer][0]); // these will not be called in evalfull // for (i = nextbit, j = nextbit; j < nodes_in_prev_layer-1; ++j, i+=2) // { // //printf("j = %u\n", j ); // traverse2(prgkey, s[1-curlayer][j], cw_t, cw, t[1-curlayer][j], &s[curlayer][i], &t[curlayer][i]); // } // if (nodes_in_prev_layer > j) // { // //printf("jj' = %u\n", j ); // if (i < nodes_in_cur_layer - 1) // { // traverse2(prgkey, s[1-curlayer][j], cw_t, cw, t[1-curlayer][j], &s[curlayer][i], &t[curlayer][i]); // //printf("If\n"); // } // else // { // traverse(prgkey, s[1-curlayer][j], L, cw_t[L], cw, t[1-curlayer][j], s[curlayer][i], t[curlayer][i]); // will not be called in evalfull // // printf("else\n"); // } // } // } // finalize(prgkey, dpfkey.finalizer, output, s[0], nodes_in_interval, t[0]); // } // dpf::__evalinterval // template // inline void evalinterval(const dpf_key & dpfkey, const size_t from, const size_t to, leaf_t * output, uint8_t * t) // { // uint8_t * tt = t ? t : reinterpret_cast(malloc(dpfkey.nodes_in_interval(from, to) * sizeof(uint8_t))); // __evalinterval(dpfkey, from, to, output, tt); // if (!t) free(tt); // } // dpf::evalinterval // template // inline void evalfull(const dpf_key & dpfkey, leaf_t * output, uint8_t * t) // { // uint8_t * tt = t ? t : reinterpret_cast(malloc(dpfkey.nodes_at_leaf_layer() * sizeof(uint8_t))); // __evalinterval(dpfkey, 0, dpfkey.nitems-1, output, tt); // if (!t) free(tt); // } // dpf::evalfull // template // inline leaf_t eval(const dpf_key & dpfkey, const size_t input) // { // auto prgkey = dpfkey.prgkey; // auto root = dpfkey.root; // auto depth = dpfkey.depth(); // auto nbits = dpfkey.input_bits(); // node_t S = root; // uint8_t T = get_lsb(root, 0b01); // for (size_t layer = 0; layer < depth; ++layer) // { // auto & cw = dpfkey.cw[layer]; // const uint8_t nextbit = (input >> (nbits-layer-1)) & 1; // traverse(prgkey, S, nextbit, get_lsb(cw, nextbit ? 0b10 : 0b01), cw, T, S, T); // } // std::array::nodes_per_leaf> final; // finalize(prgkey, dpfkey.finalizer, &final, &S, 1, &T); // if constexpr(dpfkey.is_packed) // { // auto S_ = reinterpret_cast(&final); // return std::forward(getword(*S_, input % dpfkey.leaves_per_node)); // } // else // { // auto ret = reinterpret_cast(&final); // return *ret; // } // // } // dpf::eval // } // namespace dpf //#endif // DPFPP_DPF_H