rdpf.tcc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. // Templated method implementations for rdpf.hpp
  2. // Create a StreamEval object that will start its output at index start.
  3. // It will wrap around to 0 when it hits 2^depth. If use_expansion
  4. // is true, then if the DPF has been expanded, just output values
  5. // from that. If use_expansion=false or if the DPF has not been
  6. // expanded, compute the values on the fly. If xor_offset is non-zero,
  7. // then the outputs are actually
  8. // DPF(start XOR xor_offset)
  9. // DPF((start+1) XOR xor_offset)
  10. // DPF((start+2) XOR xor_offset)
  11. // etc.
  12. template <typename T>
  13. StreamEval<T>::StreamEval(const T &rdpf, address_t start,
  14. address_t xor_offset, size_t &aes_ops,
  15. bool use_expansion) : rdpf(rdpf), aes_ops(aes_ops),
  16. use_expansion(use_expansion), counter_xor_offset(xor_offset)
  17. {
  18. depth = rdpf.depth();
  19. // Prevent overflow of 1<<depth
  20. if (depth < ADDRESS_MAX_BITS) {
  21. indexmask = (address_t(1)<<depth)-1;
  22. } else {
  23. indexmask = ~0;
  24. }
  25. start &= indexmask;
  26. counter_xor_offset &= indexmask;
  27. // Record that we haven't actually output the leaf for index start
  28. // itself yet
  29. nextindex = start;
  30. if (use_expansion && rdpf.has_expansion()) {
  31. // We just need to keep the counter, not compute anything
  32. return;
  33. }
  34. path.resize(depth);
  35. pathindex = start;
  36. path[0] = rdpf.get_seed();
  37. for (nbits_t i=1;i<depth;++i) {
  38. bool dir = !!(pathindex & (address_t(1)<<(depth-i)));
  39. bool xor_offset_bit =
  40. !!(counter_xor_offset & (address_t(1)<<(depth-i)));
  41. path[i] = rdpf.descend(path[i-1], i-1,
  42. dir ^ xor_offset_bit, aes_ops);
  43. }
  44. }
  45. template <typename T>
  46. typename T::node StreamEval<T>::next()
  47. {
  48. if (use_expansion && rdpf.has_expansion()) {
  49. // Just use the precomputed values
  50. typename T::node leaf =
  51. rdpf.get_expansion(nextindex ^ counter_xor_offset);
  52. nextindex = (nextindex + 1) & indexmask;
  53. return leaf;
  54. }
  55. // Invariant: in the first call to next(), nextindex = pathindex.
  56. // Otherwise, nextindex = pathindex+1.
  57. // Get the XOR of nextindex and pathindex, and strip the low bit.
  58. // If nextindex and pathindex are equal, or pathindex is even
  59. // and nextindex is the consecutive odd number, index_xor will be 0,
  60. // indicating that we don't have to update the path, but just
  61. // compute the appropriate leaf given by the low bit of nextindex.
  62. //
  63. // Otherwise, say for example pathindex is 010010111 and nextindex
  64. // is 010011000. Then their XOR is 000001111, and stripping the low
  65. // bit yields 000001110, so how_many_1_bits will be 3.
  66. // That indicates (typically) that path[depth-3] was a left child,
  67. // and now we need to change it to a right child by descending right
  68. // from path[depth-4], and then filling the path after that with
  69. // left children.
  70. //
  71. // When we wrap around, however, index_xor will be 111111110 (after
  72. // we strip the low bit), and how_many_1_bits will be depth-1, but
  73. // the new top child (of the root seed) we have to compute will be a
  74. // left, not a right, child.
  75. uint64_t index_xor = (nextindex ^ pathindex) & ~1;
  76. nbits_t how_many_1_bits = __builtin_popcount(index_xor);
  77. if (how_many_1_bits > 0) {
  78. // This will almost always be 1, unless we've just wrapped
  79. // around from the right subtree back to the left, in which case
  80. // it will be 0.
  81. bool top_changed_bit =
  82. !!(nextindex & (address_t(1) << how_many_1_bits));
  83. bool xor_offset_bit =
  84. !!(counter_xor_offset & (address_t(1) << how_many_1_bits));
  85. path[depth-how_many_1_bits] =
  86. rdpf.descend(path[depth-how_many_1_bits-1],
  87. depth-how_many_1_bits-1,
  88. top_changed_bit ^ xor_offset_bit, aes_ops);
  89. for (nbits_t i = depth-how_many_1_bits; i < depth-1; ++i) {
  90. bool xor_offset_bit =
  91. !!(counter_xor_offset & (address_t(1) << (depth-i-1)));
  92. path[i+1] = rdpf.descend(path[i], i, xor_offset_bit, aes_ops);
  93. }
  94. }
  95. bool xor_offset_bit = counter_xor_offset & 1;
  96. typename T::node leaf = rdpf.descend(path[depth-1], depth-1,
  97. (nextindex & 1) ^ xor_offset_bit, aes_ops);
  98. pathindex = nextindex;
  99. nextindex = (nextindex + 1) & indexmask;
  100. return leaf;
  101. }
  102. // Run the parallel evaluator. The type V is the type of the
  103. // accumulator; init should be the "zero" value of the accumulator.
  104. // The type W (process) is a lambda type with the signature
  105. // (int, address_t, const T::node &) -> V
  106. // which will be called like this for each i from 0 to num_evals-1,
  107. // across num_thread threads:
  108. // value_i = process(t, i, DPF((start+i) XOR xor_offset))
  109. // t is the thread number (0 <= t < num_threads).
  110. // The resulting num_evals values will be combined using V's +=
  111. // operator, first accumulating the values within each thread
  112. // (starting with the init value), and then accumulating the totals
  113. // from each thread together (again starting with the init value):
  114. //
  115. // total = init
  116. // for each thread t:
  117. // accum_t = init
  118. // for each accum_i generated by thread t:
  119. // accum_t += value_i
  120. // total += accum_t
  121. template <typename T> template <typename V, typename W>
  122. inline V ParallelEval<T>::reduce(V init, W process)
  123. {
  124. size_t thread_aes_ops[num_threads];
  125. V accums[num_threads];
  126. boost::asio::thread_pool pool(num_threads);
  127. address_t threadstart = start;
  128. address_t threadchunk = num_evals / num_threads;
  129. address_t threadextra = num_evals % num_threads;
  130. nbits_t depth = rdpf.depth();
  131. address_t indexmask = (depth < ADDRESS_MAX_BITS ?
  132. ((address_t(1)<<depth)-1) : ~0);
  133. for (int thread_num = 0; thread_num < num_threads; ++thread_num) {
  134. address_t threadsize = threadchunk + (address_t(thread_num) < threadextra);
  135. boost::asio::post(pool,
  136. [this, &init, &thread_aes_ops, &accums, &process,
  137. thread_num, threadstart, threadsize, indexmask] {
  138. size_t local_aes_ops = 0;
  139. auto ev = StreamEval(rdpf, (start+threadstart)&indexmask,
  140. xor_offset, local_aes_ops);
  141. V accum = init;
  142. for (address_t x=0;x<threadsize;++x) {
  143. typename T::node leaf = ev.next();
  144. accum += process(thread_num,
  145. (threadstart+x)&indexmask, leaf);
  146. }
  147. accums[thread_num] = accum;
  148. thread_aes_ops[thread_num] = local_aes_ops;
  149. });
  150. threadstart = (threadstart + threadsize) & indexmask;
  151. }
  152. pool.join();
  153. V total = init;
  154. for (int thread_num = 0; thread_num < num_threads; ++thread_num) {
  155. total += accums[thread_num];
  156. aes_ops += thread_aes_ops[thread_num];
  157. }
  158. return total;
  159. }
  160. // I/O for RDPFs
  161. template <typename T>
  162. T& operator>>(T &is, RDPF &rdpf)
  163. {
  164. is.read((char *)&rdpf.seed, sizeof(rdpf.seed));
  165. rdpf.whichhalf = get_lsb(rdpf.seed);
  166. uint8_t depth;
  167. // Add 64 to depth to indicate an expanded RDPF
  168. is.read((char *)&depth, sizeof(depth));
  169. bool read_expanded = false;
  170. if (depth > 64) {
  171. read_expanded = true;
  172. depth -= 64;
  173. }
  174. assert(depth <= ADDRESS_MAX_BITS);
  175. rdpf.cw.clear();
  176. for (uint8_t i=0; i<depth; ++i) {
  177. DPFnode cw;
  178. is.read((char *)&cw, sizeof(cw));
  179. rdpf.cw.push_back(cw);
  180. }
  181. if (read_expanded) {
  182. rdpf.expansion.resize(1<<depth);
  183. is.read((char *)rdpf.expansion.data(),
  184. sizeof(rdpf.expansion[0])<<depth);
  185. }
  186. value_t cfbits = 0;
  187. is.read((char *)&cfbits, BITBYTES(depth));
  188. rdpf.cfbits = cfbits;
  189. is.read((char *)&rdpf.unit_sum_inverse, sizeof(rdpf.unit_sum_inverse));
  190. is.read((char *)&rdpf.scaled_sum, sizeof(rdpf.scaled_sum));
  191. is.read((char *)&rdpf.scaled_xor, sizeof(rdpf.scaled_xor));
  192. return is;
  193. }
  194. // Write the DPF to the output stream. If expanded=true, then include
  195. // the expansion _if_ the DPF is itself already expanded. You can use
  196. // this to write DPFs to files.
  197. template <typename T>
  198. T& write_maybe_expanded(T &os, const RDPF &rdpf,
  199. bool expanded = true)
  200. {
  201. os.write((const char *)&rdpf.seed, sizeof(rdpf.seed));
  202. uint8_t depth = rdpf.cw.size();
  203. assert(depth <= ADDRESS_MAX_BITS);
  204. // If we're writing an expansion, add 64 to depth
  205. uint8_t expanded_depth = depth;
  206. bool write_expansion = false;
  207. if (expanded && rdpf.expansion.size() == (size_t(1)<<depth)) {
  208. write_expansion = true;
  209. expanded_depth += 64;
  210. }
  211. os.write((const char *)&expanded_depth, sizeof(expanded_depth));
  212. for (uint8_t i=0; i<depth; ++i) {
  213. os.write((const char *)&rdpf.cw[i], sizeof(rdpf.cw[i]));
  214. }
  215. if (write_expansion) {
  216. os.write((const char *)rdpf.expansion.data(),
  217. sizeof(rdpf.expansion[0])<<depth);
  218. }
  219. os.write((const char *)&rdpf.cfbits, BITBYTES(depth));
  220. os.write((const char *)&rdpf.unit_sum_inverse, sizeof(rdpf.unit_sum_inverse));
  221. os.write((const char *)&rdpf.scaled_sum, sizeof(rdpf.scaled_sum));
  222. os.write((const char *)&rdpf.scaled_xor, sizeof(rdpf.scaled_xor));
  223. return os;
  224. }
  225. // The ordinary << version never writes the expansion, since this is
  226. // what we use to send DPFs over the network.
  227. template <typename T>
  228. T& operator<<(T &os, const RDPF &rdpf)
  229. {
  230. return write_maybe_expanded(os, rdpf, false);
  231. }
  232. // I/O for RDPF Triples
  233. // We never write RDPFTriples over the network, so always write
  234. // the DPF expansions if they're available.
  235. template <typename T>
  236. T& operator<<(T &os, const RDPFTriple &rdpftrip)
  237. {
  238. write_maybe_expanded(os, rdpftrip.dpf[0], true);
  239. write_maybe_expanded(os, rdpftrip.dpf[1], true);
  240. write_maybe_expanded(os, rdpftrip.dpf[2], true);
  241. nbits_t depth = rdpftrip.dpf[0].depth();
  242. os.write((const char *)&rdpftrip.as_target.ashare, BITBYTES(depth));
  243. os.write((const char *)&rdpftrip.xs_target.xshare, BITBYTES(depth));
  244. return os;
  245. }
  246. template <typename T>
  247. T& operator>>(T &is, RDPFTriple &rdpftrip)
  248. {
  249. is >> rdpftrip.dpf[0] >> rdpftrip.dpf[1] >> rdpftrip.dpf[2];
  250. nbits_t depth = rdpftrip.dpf[0].depth();
  251. rdpftrip.as_target.ashare = 0;
  252. is.read((char *)&rdpftrip.as_target.ashare, BITBYTES(depth));
  253. rdpftrip.xs_target.xshare = 0;
  254. is.read((char *)&rdpftrip.xs_target.xshare, BITBYTES(depth));
  255. return is;
  256. }
  257. // I/O for RDPF Pairs
  258. // We never write RDPFPairs over the network, so always write
  259. // the DPF expansions if they're available.
  260. template <typename T>
  261. T& operator<<(T &os, const RDPFPair &rdpfpair)
  262. {
  263. write_maybe_expanded(os, rdpfpair.dpf[0], true);
  264. write_maybe_expanded(os, rdpfpair.dpf[1], true);
  265. return os;
  266. }
  267. template <typename T>
  268. T& operator>>(T &is, RDPFPair &rdpfpair)
  269. {
  270. is >> rdpfpair.dpf[0] >> rdpfpair.dpf[1];
  271. return is;
  272. }