rdpf.tcc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. // Templated method implementations for rdpf.hpp
  2. // Create a StreamEval object that will start its output at index start.
  3. // It will wrap around to 0 when it hits 2^depth. If use_expansion
  4. // is true, then if the DPF has been expanded, just output values
  5. // from that. If use_expansion=false or if the DPF has not been
  6. // expanded, compute the values on the fly. If xor_offset is non-zero,
  7. // then the outputs are actually
  8. // DPF(start XOR xor_offset)
  9. // DPF((start+1) XOR xor_offset)
  10. // DPF((start+2) XOR xor_offset)
  11. // etc.
  12. template <typename T>
  13. StreamEval<T>::StreamEval(const T &rdpf, address_t start,
  14. address_t xor_offset, size_t &aes_ops,
  15. bool use_expansion) : rdpf(rdpf), aes_ops(aes_ops),
  16. use_expansion(use_expansion), counter_xor_offset(xor_offset)
  17. {
  18. depth = rdpf.depth();
  19. // Prevent overflow of 1<<depth
  20. if (depth < ADDRESS_MAX_BITS) {
  21. indexmask = (address_t(1)<<depth)-1;
  22. } else {
  23. indexmask = ~0;
  24. }
  25. start &= indexmask;
  26. counter_xor_offset &= indexmask;
  27. // Record that we haven't actually output the leaf for index start
  28. // itself yet
  29. nextindex = start;
  30. if (use_expansion && rdpf.has_expansion()) {
  31. // We just need to keep the counter, not compute anything
  32. return;
  33. }
  34. path.resize(depth);
  35. pathindex = start;
  36. path[0] = rdpf.get_seed();
  37. for (nbits_t i=1;i<depth;++i) {
  38. bool dir = !!(pathindex & (address_t(1)<<(depth-i)));
  39. bool xor_offset_bit =
  40. !!(counter_xor_offset & (address_t(1)<<(depth-i)));
  41. path[i] = rdpf.descend(path[i-1], i-1,
  42. dir ^ xor_offset_bit, aes_ops);
  43. }
  44. }
  45. template <typename T>
  46. typename T::node StreamEval<T>::next()
  47. {
  48. if (use_expansion && rdpf.has_expansion()) {
  49. // Just use the precomputed values
  50. typename T::node leaf =
  51. rdpf.get_expansion(nextindex ^ counter_xor_offset);
  52. nextindex = (nextindex + 1) & indexmask;
  53. return leaf;
  54. }
  55. // Invariant: in the first call to next(), nextindex = pathindex.
  56. // Otherwise, nextindex = pathindex+1.
  57. // Get the XOR of nextindex and pathindex, and strip the low bit.
  58. // If nextindex and pathindex are equal, or pathindex is even
  59. // and nextindex is the consecutive odd number, index_xor will be 0,
  60. // indicating that we don't have to update the path, but just
  61. // compute the appropriate leaf given by the low bit of nextindex.
  62. //
  63. // Otherwise, say for example pathindex is 010010111 and nextindex
  64. // is 010011000. Then their XOR is 000001111, and stripping the low
  65. // bit yields 000001110, so how_many_1_bits will be 3.
  66. // That indicates (typically) that path[depth-3] was a left child,
  67. // and now we need to change it to a right child by descending right
  68. // from path[depth-4], and then filling the path after that with
  69. // left children.
  70. //
  71. // When we wrap around, however, index_xor will be 111111110 (after
  72. // we strip the low bit), and how_many_1_bits will be depth-1, but
  73. // the new top child (of the root seed) we have to compute will be a
  74. // left, not a right, child.
  75. uint64_t index_xor = (nextindex ^ pathindex) & ~1;
  76. nbits_t how_many_1_bits = __builtin_popcount(index_xor);
  77. if (how_many_1_bits > 0) {
  78. // This will almost always be 1, unless we've just wrapped
  79. // around from the right subtree back to the left, in which case
  80. // it will be 0.
  81. bool top_changed_bit =
  82. !!(nextindex & (address_t(1) << how_many_1_bits));
  83. bool xor_offset_bit =
  84. !!(counter_xor_offset & (address_t(1) << how_many_1_bits));
  85. path[depth-how_many_1_bits] =
  86. rdpf.descend(path[depth-how_many_1_bits-1],
  87. depth-how_many_1_bits-1,
  88. top_changed_bit ^ xor_offset_bit, aes_ops);
  89. for (nbits_t i = depth-how_many_1_bits; i < depth-1; ++i) {
  90. bool xor_offset_bit =
  91. !!(counter_xor_offset & (address_t(1) << (depth-i-1)));
  92. path[i+1] = rdpf.descend(path[i], i, xor_offset_bit, aes_ops);
  93. }
  94. }
  95. bool xor_offset_bit = counter_xor_offset & 1;
  96. typename T::node leaf = rdpf.descend(path[depth-1], depth-1,
  97. (nextindex & 1) ^ xor_offset_bit, aes_ops);
  98. pathindex = nextindex;
  99. nextindex = (nextindex + 1) & indexmask;
  100. return leaf;
  101. }
  102. // Run the parallel evaluator. The type V is the type of the
  103. // accumulator; init should be the "zero" value of the accumulator.
  104. // The type W (process) is a lambda type with the signature
  105. // (int, address_t, const T::node &) -> V
  106. // which will be called like this for each i from 0 to num_evals-1,
  107. // across num_thread threads:
  108. // value_i = process(t, i, DPF((start+i) XOR xor_offset))
  109. // t is the thread number (0 <= t < num_threads).
  110. // The resulting num_evals values will be combined using V's +=
  111. // operator, first accumulating the values within each thread
  112. // (starting with the init value), and then accumulating the totals
  113. // from each thread together (again starting with the init value):
  114. //
  115. // total = init
  116. // for each thread t:
  117. // accum_t = init
  118. // for each accum_i generated by thread t:
  119. // accum_t += value_i
  120. // total += accum_t
  121. template <typename T> template <typename V, typename W>
  122. inline V ParallelEval<T>::reduce(V init, W process)
  123. {
  124. size_t thread_aes_ops[num_threads];
  125. V accums[num_threads];
  126. boost::asio::thread_pool pool(num_threads);
  127. address_t threadstart = start;
  128. address_t threadchunk = num_evals / num_threads;
  129. address_t threadextra = num_evals % num_threads;
  130. nbits_t depth = rdpf.depth();
  131. address_t indexmask = (depth < ADDRESS_MAX_BITS ?
  132. ((address_t(1)<<depth)-1) : ~0);
  133. for (int thread_num = 0; thread_num < num_threads; ++thread_num) {
  134. address_t threadsize = threadchunk + (address_t(thread_num) < threadextra);
  135. boost::asio::post(pool,
  136. [this, &init, &thread_aes_ops, &accums, &process,
  137. thread_num, threadstart, threadsize, indexmask] {
  138. size_t local_aes_ops = 0;
  139. auto ev = StreamEval(rdpf, (start+threadstart)&indexmask,
  140. xor_offset, local_aes_ops);
  141. V accum = init;
  142. for (address_t x=0;x<threadsize;++x) {
  143. typename T::node leaf = ev.next();
  144. accum += process(thread_num,
  145. (threadstart+x)&indexmask, leaf);
  146. }
  147. accums[thread_num] = accum;
  148. thread_aes_ops[thread_num] = local_aes_ops;
  149. });
  150. threadstart = (threadstart + threadsize) & indexmask;
  151. }
  152. pool.join();
  153. V total = init;
  154. for (int thread_num = 0; thread_num < num_threads; ++thread_num) {
  155. total += accums[thread_num];
  156. aes_ops += thread_aes_ops[thread_num];
  157. }
  158. return total;
  159. }
  160. // Additive share of the target index
  161. template <>
  162. inline RegAS RDPFTriple::target<RegAS>() const {
  163. return as_target;
  164. }
  165. // XOR share of the target index
  166. template <>
  167. inline RegXS RDPFTriple::target<RegXS>() const {
  168. return xs_target;
  169. }
  170. // Additive share of the scaling value M_as such that the high words
  171. // of the leaf values for P0 and P1 add to M_as * e_{target}
  172. template <>
  173. inline std::tuple<RegAS,RegAS,RegAS> RDPFTriple::scaled_value<RegAS>() const {
  174. return std::make_tuple(dpf[0].scaled_sum, dpf[1].scaled_sum,
  175. dpf[2].scaled_sum);
  176. }
  177. // XOR share of the scaling value M_xs such that the high words
  178. // of the leaf values for P0 and P1 XOR to M_xs * e_{target}
  179. template <>
  180. inline std::tuple<RegXS,RegXS,RegXS> RDPFTriple::scaled_value<RegXS>() const {
  181. return std::make_tuple(dpf[0].scaled_xor, dpf[1].scaled_xor,
  182. dpf[2].scaled_xor);
  183. }
  184. // Get the bit-shared unit vector entry from the leaf node
  185. template <>
  186. inline std::tuple<RegXS,RegXS,RegXS> RDPFTriple::unit<RegXS>(node leaf) const {
  187. return std::make_tuple(
  188. dpf[0].unit_bs(std::get<0>(leaf)),
  189. dpf[1].unit_bs(std::get<1>(leaf)),
  190. dpf[2].unit_bs(std::get<2>(leaf)));
  191. }
  192. // Get the additive-shared unit vector entry from the leaf node
  193. template <>
  194. inline std::tuple<RegAS,RegAS,RegAS> RDPFTriple::unit<RegAS>(node leaf) const {
  195. return std::make_tuple(
  196. dpf[0].unit_as(std::get<0>(leaf)),
  197. dpf[1].unit_as(std::get<1>(leaf)),
  198. dpf[2].unit_as(std::get<2>(leaf)));
  199. }
  200. // For any more complex entry type, that type will handle the conversion
  201. // for each DPF
  202. template <typename T>
  203. inline std::tuple<T,T,T> RDPFTriple::unit(node leaf) const {
  204. T v0, v1, v2;
  205. v0.unit(dpf[0], std::get<0>(leaf));
  206. v1.unit(dpf[1], std::get<1>(leaf));
  207. v2.unit(dpf[2], std::get<2>(leaf));
  208. return std::make_tuple(v0,v1,v2);
  209. }
  210. // Get the XOR-shared scaled vector entry from the leaf ndoe
  211. template <>
  212. inline std::tuple<RegXS,RegXS,RegXS> RDPFTriple::scaled<RegXS>(node leaf) const {
  213. return std::make_tuple(
  214. dpf[0].scaled_xs(std::get<0>(leaf)),
  215. dpf[1].scaled_xs(std::get<1>(leaf)),
  216. dpf[2].scaled_xs(std::get<2>(leaf)));
  217. }
  218. // Get the additive-shared scaled vector entry from the leaf node
  219. template <>
  220. inline std::tuple<RegAS,RegAS,RegAS> RDPFTriple::scaled<RegAS>(node leaf) const {
  221. return std::make_tuple(
  222. dpf[0].scaled_as(std::get<0>(leaf)),
  223. dpf[1].scaled_as(std::get<1>(leaf)),
  224. dpf[2].scaled_as(std::get<2>(leaf)));
  225. }
  226. // Additive share of the scaling value M_as such that the high words
  227. // of the leaf values for P0 and P1 add to M_as * e_{target}
  228. template <>
  229. inline std::tuple<RegAS,RegAS> RDPFPair::scaled_value<RegAS>() const {
  230. return std::make_tuple(dpf[0].scaled_sum, dpf[1].scaled_sum);
  231. }
  232. // XOR share of the scaling value M_xs such that the high words
  233. // of the leaf values for P0 and P1 XOR to M_xs * e_{target}
  234. template <>
  235. inline std::tuple<RegXS,RegXS> RDPFPair::scaled_value<RegXS>() const {
  236. return std::make_tuple(dpf[0].scaled_xor, dpf[1].scaled_xor);
  237. }
  238. // Get the bit-shared unit vector entry from the leaf node
  239. template <>
  240. inline std::tuple<RegXS,RegXS> RDPFPair::unit<RegXS>(node leaf) const {
  241. return std::make_tuple(
  242. dpf[0].unit_bs(std::get<0>(leaf)),
  243. dpf[1].unit_bs(std::get<1>(leaf)));
  244. }
  245. // Get the additive-shared unit vector entry from the leaf node
  246. template <>
  247. inline std::tuple<RegAS,RegAS> RDPFPair::unit<RegAS>(node leaf) const {
  248. return std::make_tuple(
  249. dpf[0].unit_as(std::get<0>(leaf)),
  250. dpf[1].unit_as(std::get<1>(leaf)));
  251. }
  252. // For any more complex entry type, that type will handle the conversion
  253. // for each DPF
  254. template <typename T>
  255. inline std::tuple<T,T> RDPFPair::unit(node leaf) const {
  256. T v0, v1;
  257. v0.unit(dpf[0], std::get<0>(leaf));
  258. v1.unit(dpf[1], std::get<1>(leaf));
  259. return std::make_tuple(v0,v1);
  260. }
  261. // Get the XOR-shared scaled vector entry from the leaf ndoe
  262. template <>
  263. inline std::tuple<RegXS,RegXS> RDPFPair::scaled<RegXS>(node leaf) const {
  264. return std::make_tuple(
  265. dpf[0].scaled_xs(std::get<0>(leaf)),
  266. dpf[1].scaled_xs(std::get<1>(leaf)));
  267. }
  268. // Get the additive-shared scaled vector entry from the leaf node
  269. template <>
  270. inline std::tuple<RegAS,RegAS> RDPFPair::scaled<RegAS>(node leaf) const {
  271. return std::make_tuple(
  272. dpf[0].scaled_as(std::get<0>(leaf)),
  273. dpf[1].scaled_as(std::get<1>(leaf)));
  274. }
  275. // I/O for RDPFs
  276. template <typename T>
  277. T& operator>>(T &is, RDPF &rdpf)
  278. {
  279. is.read((char *)&rdpf.seed, sizeof(rdpf.seed));
  280. rdpf.whichhalf = get_lsb(rdpf.seed);
  281. uint8_t depth;
  282. // Add 64 to depth to indicate an expanded RDPF
  283. is.read((char *)&depth, sizeof(depth));
  284. bool read_expanded = false;
  285. if (depth > 64) {
  286. read_expanded = true;
  287. depth -= 64;
  288. }
  289. assert(depth <= ADDRESS_MAX_BITS);
  290. rdpf.cw.clear();
  291. for (uint8_t i=0; i<depth; ++i) {
  292. DPFnode cw;
  293. is.read((char *)&cw, sizeof(cw));
  294. rdpf.cw.push_back(cw);
  295. }
  296. if (read_expanded) {
  297. rdpf.expansion.resize(1<<depth);
  298. is.read((char *)rdpf.expansion.data(),
  299. sizeof(rdpf.expansion[0])<<depth);
  300. }
  301. value_t cfbits = 0;
  302. is.read((char *)&cfbits, BITBYTES(depth));
  303. rdpf.cfbits = cfbits;
  304. is.read((char *)&rdpf.unit_sum_inverse, sizeof(rdpf.unit_sum_inverse));
  305. is.read((char *)&rdpf.scaled_sum, sizeof(rdpf.scaled_sum));
  306. is.read((char *)&rdpf.scaled_xor, sizeof(rdpf.scaled_xor));
  307. return is;
  308. }
  309. // Write the DPF to the output stream. If expanded=true, then include
  310. // the expansion _if_ the DPF is itself already expanded. You can use
  311. // this to write DPFs to files.
  312. template <typename T>
  313. T& write_maybe_expanded(T &os, const RDPF &rdpf,
  314. bool expanded = true)
  315. {
  316. os.write((const char *)&rdpf.seed, sizeof(rdpf.seed));
  317. uint8_t depth = rdpf.cw.size();
  318. assert(depth <= ADDRESS_MAX_BITS);
  319. // If we're writing an expansion, add 64 to depth
  320. uint8_t expanded_depth = depth;
  321. bool write_expansion = false;
  322. if (expanded && rdpf.expansion.size() == (size_t(1)<<depth)) {
  323. write_expansion = true;
  324. expanded_depth += 64;
  325. }
  326. os.write((const char *)&expanded_depth, sizeof(expanded_depth));
  327. for (uint8_t i=0; i<depth; ++i) {
  328. os.write((const char *)&rdpf.cw[i], sizeof(rdpf.cw[i]));
  329. }
  330. if (write_expansion) {
  331. os.write((const char *)rdpf.expansion.data(),
  332. sizeof(rdpf.expansion[0])<<depth);
  333. }
  334. os.write((const char *)&rdpf.cfbits, BITBYTES(depth));
  335. os.write((const char *)&rdpf.unit_sum_inverse, sizeof(rdpf.unit_sum_inverse));
  336. os.write((const char *)&rdpf.scaled_sum, sizeof(rdpf.scaled_sum));
  337. os.write((const char *)&rdpf.scaled_xor, sizeof(rdpf.scaled_xor));
  338. return os;
  339. }
  340. // The ordinary << version never writes the expansion, since this is
  341. // what we use to send DPFs over the network.
  342. template <typename T>
  343. T& operator<<(T &os, const RDPF &rdpf)
  344. {
  345. return write_maybe_expanded(os, rdpf, false);
  346. }
  347. // I/O for RDPF Triples
  348. // We never write RDPFTriples over the network, so always write
  349. // the DPF expansions if they're available.
  350. template <typename T>
  351. T& operator<<(T &os, const RDPFTriple &rdpftrip)
  352. {
  353. write_maybe_expanded(os, rdpftrip.dpf[0], true);
  354. write_maybe_expanded(os, rdpftrip.dpf[1], true);
  355. write_maybe_expanded(os, rdpftrip.dpf[2], true);
  356. nbits_t depth = rdpftrip.dpf[0].depth();
  357. os.write((const char *)&rdpftrip.as_target.ashare, BITBYTES(depth));
  358. os.write((const char *)&rdpftrip.xs_target.xshare, BITBYTES(depth));
  359. return os;
  360. }
  361. template <typename T>
  362. T& operator>>(T &is, RDPFTriple &rdpftrip)
  363. {
  364. is >> rdpftrip.dpf[0] >> rdpftrip.dpf[1] >> rdpftrip.dpf[2];
  365. nbits_t depth = rdpftrip.dpf[0].depth();
  366. rdpftrip.as_target.ashare = 0;
  367. is.read((char *)&rdpftrip.as_target.ashare, BITBYTES(depth));
  368. rdpftrip.xs_target.xshare = 0;
  369. is.read((char *)&rdpftrip.xs_target.xshare, BITBYTES(depth));
  370. return is;
  371. }
  372. // I/O for RDPF Pairs
  373. // We never write RDPFPairs over the network, so always write
  374. // the DPF expansions if they're available.
  375. template <typename T>
  376. T& operator<<(T &os, const RDPFPair &rdpfpair)
  377. {
  378. write_maybe_expanded(os, rdpfpair.dpf[0], true);
  379. write_maybe_expanded(os, rdpfpair.dpf[1], true);
  380. return os;
  381. }
  382. template <typename T>
  383. T& operator>>(T &is, RDPFPair &rdpfpair)
  384. {
  385. is >> rdpfpair.dpf[0] >> rdpfpair.dpf[1];
  386. return is;
  387. }