rdpf.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. #ifndef __RDPF_HPP__
  2. #define __RDPF_HPP__
  3. #include <array>
  4. #include <vector>
  5. #include <iostream>
  6. #include "mpcio.hpp"
  7. #include "coroutine.hpp"
  8. #include "types.hpp"
  9. #include "bitutils.hpp"
  10. #include "dpf.hpp"
  11. // DPFs for oblivious random accesses to memory. See dpf.hpp for the
  12. // differences between the different kinds of DPFs.
  13. template <nbits_t WIDTH>
  14. struct RDPF : public DPF {
  15. template <typename T>
  16. using W = std::array<T, WIDTH>;
  17. using RegASW = W<RegAS>;
  18. using RegXSW = W<RegXS>;
  19. // The number of 128-bit leaf node entries you need to get 1 unit
  20. // value and WIDTH scaled values (each is 64 bits)
  21. static const nbits_t LWIDTH = 1 + (WIDTH/2);
  22. using LeafNode = std::array<DPFnode,LWIDTH>;
  23. // Information for leaf levels of the RDPF. Normal RDPFs only have
  24. // one leaf level (at the bottom), but incremental RDPFs have a leaf
  25. // level for each level of the DPF.
  26. struct LeafInfo {
  27. // The WIDTH correction words for this leaf level
  28. std::array<DPFnode,WIDTH> leaf_cw;
  29. // The amount we have to scale the low words of the leaf values by
  30. // to get additive shares of a unit vector
  31. value_t unit_sum_inverse;
  32. // Additive share of the scaling values M_as such that the high words
  33. // of the WIDTH leaf values for P0 and P1 add to M_as * e_{target}
  34. std::array<RegAS,WIDTH> scaled_sum;
  35. // XOR share of the scaling values M_xs such that the high words
  36. // of the WIDTH leaf values for P0 and P1 XOR to M_xs * e_{target}
  37. std::array<RegXS,WIDTH> scaled_xor;
  38. LeafInfo() : unit_sum_inverse(0) {}
  39. };
  40. // The LeafInfo for each leaf level. Normal RDPFs only have one
  41. // leaf level, so this will be a vector of length 1. Incremental
  42. // RDPFs will have one entry for each level in the DPF. The entry
  43. // corresponding to level i of the DPF (of total depth d) is
  44. // leaf_info[d-i].
  45. std::vector<LeafInfo> li;
  46. // The leaf correction flag bits for the WIDTH leaf words at each
  47. // leaf level. The bit for leaf word j of level i (for an
  48. // incremental DPF of total depth d) is leaf_cfbits[j] & (1<<(d-i)).
  49. // For a normal (not incremental) RDPF, it's the same, but therefore
  50. // only the low bit of each of these WIDTH words gets used.
  51. std::array<value_t,WIDTH> leaf_cfbits;
  52. // If we're saving the expansion, put it here
  53. std::vector<LeafNode> expansion;
  54. RDPF() {}
  55. // Construct a DPF with the given (XOR-shared) target location, and
  56. // of the given depth, to be used for random-access memory reads and
  57. // writes. The DPF is constructed collaboratively by P0 and P1,
  58. // with the server P2 helping by providing correlated randomness,
  59. // such as SelectTriples.
  60. //
  61. // Cost:
  62. // (2 DPFnode + 2 bytes)*depth + 1 word communication in
  63. // 2*depth + 1 messages
  64. // (2 DPFnode + 1 byte)*depth communication from P2 to each party
  65. // 2^{depth+1}-2 local AES operations for P0,P1
  66. // 0 local AES operations for P2
  67. RDPF(MPCTIO &tio, yield_t &yield,
  68. RegXS target, nbits_t depth, bool save_expansion = false);
  69. // Do we have a precomputed expansion?
  70. inline bool has_expansion() const { return expansion.size() > 0; }
  71. // Get an element of the expansion
  72. inline LeafNode get_expansion(address_t index) const {
  73. return expansion[index];
  74. }
  75. // The depth
  76. inline nbits_t depth() const { return cw.size(); }
  77. // Get the leaf node for the given input
  78. //
  79. // Cost: depth AES operations
  80. LeafNode leaf(address_t input, size_t &aes_ops) const;
  81. // Expand the DPF if it's not already expanded
  82. void expand(size_t &aes_ops);
  83. // Descend from a node at depth parentdepth to one of its leaf children
  84. // whichchild = 0: left child
  85. // whichchild = 1: right child
  86. //
  87. // Cost: 1 AES operation
  88. inline LeafNode descend_to_leaf(const DPFnode &parent,
  89. nbits_t parentdepth, bit_t whichchild, size_t &aes_ops) const;
  90. // Get the bit-shared unit vector entry from the leaf node
  91. inline RegBS unit_bs(LeafNode leaf) const {
  92. RegBS b;
  93. b.bshare = get_lsb(leaf[0]);
  94. return b;
  95. }
  96. // Get the additive-shared unit vector entry from the leaf node
  97. inline RegAS unit_as(LeafNode leaf) const {
  98. RegAS a;
  99. value_t lowword = value_t(_mm_cvtsi128_si64x(leaf[0]));
  100. if (whichhalf == 1) {
  101. lowword = -lowword;
  102. }
  103. a.ashare = lowword * li[0].unit_sum_inverse;
  104. return a;
  105. }
  106. // Get the XOR-shared scaled vector entry from the leaf node
  107. inline RegXSW scaled_xs(LeafNode leaf) const {
  108. RegXSW x;
  109. nbits_t j = 0;
  110. value_t highword =
  111. value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leaf[0],8)));
  112. x[j++].xshare = highword;
  113. for (nbits_t i=1;i<WIDTH;++i) {
  114. value_t lowword =
  115. value_t(_mm_cvtsi128_si64x(leaf[i]));
  116. value_t highword =
  117. value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leaf[i],8)));
  118. x[j++].xshare = lowword;
  119. if (j < WIDTH) {
  120. x[j++].xshare = highword;
  121. }
  122. }
  123. return x;
  124. }
  125. // Get the additive-shared scaled vector entry from the leaf node
  126. inline RegASW scaled_as(LeafNode leaf) const {
  127. RegASW a;
  128. nbits_t j = 0;
  129. value_t highword =
  130. value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leaf[0],8)));
  131. if (whichhalf == 1) {
  132. highword = -highword;
  133. }
  134. a[j++].ashare = highword;
  135. for (nbits_t i=1;i<WIDTH;++i) {
  136. value_t lowword =
  137. value_t(_mm_cvtsi128_si64x(leaf[i]));
  138. value_t highword =
  139. value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leaf[i],8)));
  140. if (whichhalf == 1) {
  141. lowword = -lowword;
  142. highword = -highword;
  143. }
  144. a[j++].ashare = lowword;
  145. if (j < WIDTH) {
  146. a[j++].ashare = highword;
  147. }
  148. }
  149. return a;
  150. }
  151. };
  152. // Computational peers will generate triples of RDPFs with the _same_
  153. // random target for use in Duoram. They will each hold a share of the
  154. // target (neither knowing the complete target index). They will each
  155. // give one of the DPFs (not a matching pair) to the server, but not the
  156. // shares of the target index. So computational peers will hold a
  157. // RDPFTriple (which includes both an additive and an XOR share of the
  158. // target index), while the server will hold a RDPFPair (which does
  159. // not).
  160. template <nbits_t WIDTH>
  161. struct RDPFTriple {
  162. template <typename T>
  163. using Triple = std::tuple<T, T, T>;
  164. template <typename T>
  165. using WTriple = Triple<typename RDPF<WIDTH>::W<T>>;
  166. // The type of node triples
  167. using node = Triple<DPFnode>;
  168. using LeafNode = Triple<typename RDPF<WIDTH>::LeafNode>;
  169. using RegASWT = WTriple<RegAS>;
  170. using RegXSWT = WTriple<RegXS>;
  171. RegAS as_target;
  172. RegXS xs_target;
  173. RDPF<WIDTH> dpf[3];
  174. // The depth
  175. inline nbits_t depth() const { return dpf[0].depth(); }
  176. // The seed
  177. inline node get_seed() const {
  178. return std::make_tuple(dpf[0].get_seed(), dpf[1].get_seed(),
  179. dpf[2].get_seed());
  180. }
  181. // Do we have a precomputed expansion?
  182. inline bool has_expansion() const {
  183. return dpf[0].expansion.size() > 0;
  184. }
  185. // Get an element of the expansion
  186. inline LeafNode get_expansion(address_t index) const {
  187. return std::make_tuple(dpf[0].get_expansion(index),
  188. dpf[1].get_expansion(index), dpf[2].get_expansion(index));
  189. }
  190. RDPFTriple() {}
  191. // Construct three RDPFs of the given depth all with the same
  192. // randomly generated target index.
  193. RDPFTriple(MPCTIO &tio, yield_t &yield,
  194. nbits_t depth, bool save_expansion = false);
  195. // Descend the three RDPFs in lock step
  196. node descend(const node &parent, nbits_t parentdepth,
  197. bit_t whichchild, size_t &aes_ops) const;
  198. // Descend the three RDPFs in lock step to a leaf node
  199. LeafNode descend_to_leaf(const node &parent, nbits_t parentdepth,
  200. bit_t whichchild, size_t &aes_ops) const;
  201. // Overloaded versions of functions to get DPF components and
  202. // outputs so that the appropriate one can be selected with a
  203. // parameter
  204. inline void get_target(RegAS &target) const { target = as_target; }
  205. inline void get_target(RegXS &target) const { target = xs_target; }
  206. // Additive share of the scaling value M_as such that the high words
  207. // of the leaf values for P0 and P1 add to M_as * e_{target}
  208. inline void scaled_value(RegASWT &v) const {
  209. std::get<0>(v) = dpf[0].li[0].scaled_sum;
  210. std::get<1>(v) = dpf[1].li[0].scaled_sum;
  211. std::get<2>(v) = dpf[2].li[0].scaled_sum;
  212. }
  213. // XOR share of the scaling value M_xs such that the high words
  214. // of the leaf values for P0 and P1 XOR to M_xs * e_{target}
  215. inline void scaled_value(RegXSWT &v) const {
  216. std::get<0>(v) = dpf[0].li[0].scaled_xor;
  217. std::get<1>(v) = dpf[1].li[0].scaled_xor;
  218. std::get<2>(v) = dpf[2].li[0].scaled_xor;
  219. }
  220. // Get the additive-shared unit vector entry from the leaf node
  221. inline void unit(std::tuple<RegAS,RegAS,RegAS> &u, LeafNode leaf) const {
  222. std::get<0>(u) = dpf[0].unit_as(std::get<0>(leaf));
  223. std::get<1>(u) = dpf[1].unit_as(std::get<1>(leaf));
  224. std::get<2>(u) = dpf[2].unit_as(std::get<2>(leaf));
  225. }
  226. // Get the bit-shared unit vector entry from the leaf node
  227. inline void unit(std::tuple<RegXS,RegXS,RegXS> &u, LeafNode leaf) const {
  228. std::get<0>(u) = dpf[0].unit_bs(std::get<0>(leaf));
  229. std::get<1>(u) = dpf[1].unit_bs(std::get<1>(leaf));
  230. std::get<2>(u) = dpf[2].unit_bs(std::get<2>(leaf));
  231. }
  232. // For any more complex entry type, that type will handle the conversion
  233. // for each DPF
  234. template <typename T>
  235. inline void unit(std::tuple<T,T,T> &u, LeafNode leaf) const {
  236. std::get<0>(u).unit(dpf[0], std::get<0>(leaf));
  237. std::get<1>(u).unit(dpf[1], std::get<1>(leaf));
  238. std::get<2>(u).unit(dpf[2], std::get<2>(leaf));
  239. }
  240. // Get the additive-shared scaled vector entry from the leaf node
  241. inline void scaled(RegASWT &s, LeafNode leaf) const {
  242. std::get<0>(s) = dpf[0].scaled_as(std::get<0>(leaf));
  243. std::get<1>(s) = dpf[1].scaled_as(std::get<1>(leaf));
  244. std::get<2>(s) = dpf[2].scaled_as(std::get<2>(leaf));
  245. }
  246. // Get the XOR-shared scaled vector entry from the leaf node
  247. inline void scaled(RegXSWT &s, LeafNode leaf) const {
  248. std::get<0>(s) = dpf[0].scaled_xs(std::get<0>(leaf));
  249. std::get<1>(s) = dpf[1].scaled_xs(std::get<1>(leaf));
  250. std::get<2>(s) = dpf[2].scaled_xs(std::get<2>(leaf));
  251. }
  252. };
  253. template <nbits_t WIDTH>
  254. struct RDPFPair {
  255. template <typename T>
  256. using Pair = std::tuple<T, T>;
  257. template <typename T>
  258. using WPair = Pair<typename RDPF<WIDTH>::W<T>>;
  259. // The type of node pairs
  260. using node = Pair<DPFnode>;
  261. using LeafNode = Pair<typename RDPF<WIDTH>::LeafNode>;
  262. using RegASWP = WPair<RegAS>;
  263. using RegXSWP = WPair<RegXS>;
  264. RDPF<WIDTH> dpf[2];
  265. RDPFPair() {}
  266. // Create an RDPFPair from an RDPFTriple, keeping two of the RDPFs
  267. // and dropping one. This _moves_ the dpfs from the triple to the
  268. // pair, so the triple will no longer be valid after using this.
  269. // which0 and which1 indicate which of the dpfs to keep.
  270. RDPFPair(RDPFTriple<WIDTH> &&trip, int which0, int which1) {
  271. dpf[0] = std::move(trip.dpf[which0]);
  272. dpf[1] = std::move(trip.dpf[which1]);
  273. }
  274. // The depth
  275. inline nbits_t depth() const { return dpf[0].depth(); }
  276. // The seed
  277. inline node get_seed() const {
  278. return std::make_tuple(dpf[0].get_seed(), dpf[1].get_seed());
  279. }
  280. // Do we have a precomputed expansion?
  281. inline bool has_expansion() const {
  282. return dpf[0].expansion.size() > 0;
  283. }
  284. // Get an element of the expansion
  285. inline LeafNode get_expansion(address_t index) const {
  286. return std::make_tuple(dpf[0].get_expansion(index),
  287. dpf[1].get_expansion(index));
  288. }
  289. // Descend the two RDPFs in lock step
  290. node descend(const node &parent, nbits_t parentdepth,
  291. bit_t whichchild, size_t &aes_ops) const;
  292. // Descend the two RDPFs in lock step to a leaf node
  293. LeafNode descend_to_leaf(const node &parent, nbits_t parentdepth,
  294. bit_t whichchild, size_t &aes_ops) const;
  295. // Overloaded versions of functions to get DPF components and
  296. // outputs so that the appropriate one can be selected with a
  297. // parameter
  298. // Additive share of the scaling value M_as such that the high words
  299. // of the leaf values for P0 and P1 add to M_as * e_{target}
  300. inline void scaled_value(RegASWP &v) const {
  301. std::get<0>(v) = dpf[0].scaled_sum;
  302. std::get<1>(v) = dpf[1].scaled_sum;
  303. }
  304. // XOR share of the scaling value M_xs such that the high words
  305. // of the leaf values for P0 and P1 XOR to M_xs * e_{target}
  306. inline void scaled_value(RegXSWP &v) const {
  307. std::get<0>(v) = dpf[0].scaled_xor;
  308. std::get<1>(v) = dpf[1].scaled_xor;
  309. }
  310. // Get the additive-shared unit vector entry from the leaf node
  311. inline void unit(std::tuple<RegAS,RegAS> &u, LeafNode leaf) const {
  312. std::get<0>(u) = dpf[0].unit_as(std::get<0>(leaf));
  313. std::get<1>(u) = dpf[1].unit_as(std::get<1>(leaf));
  314. }
  315. // Get the bit-shared unit vector entry from the leaf node
  316. inline void unit(std::tuple<RegXS,RegXS> &u, LeafNode leaf) const {
  317. std::get<0>(u) = dpf[0].unit_bs(std::get<0>(leaf));
  318. std::get<1>(u) = dpf[1].unit_bs(std::get<1>(leaf));
  319. }
  320. // For any more complex entry type, that type will handle the conversion
  321. // for each DPF
  322. template <typename T>
  323. inline void unit(std::tuple<T,T> &u, LeafNode leaf) const {
  324. std::get<0>(u).unit(dpf[0], std::get<0>(leaf));
  325. std::get<1>(u).unit(dpf[1], std::get<1>(leaf));
  326. }
  327. // Get the additive-shared scaled vector entry from the leaf node
  328. inline void scaled(RegASWP &s, LeafNode leaf) const {
  329. std::get<0>(s) = dpf[0].scaled_as(std::get<0>(leaf));
  330. std::get<1>(s) = dpf[1].scaled_as(std::get<1>(leaf));
  331. }
  332. // Get the XOR-shared scaled vector entry from the leaf node
  333. inline void scaled(RegXSWP &s, LeafNode leaf) const {
  334. std::get<0>(s) = dpf[0].scaled_xs(std::get<0>(leaf));
  335. std::get<1>(s) = dpf[1].scaled_xs(std::get<1>(leaf));
  336. }
  337. };
  338. // Streaming evaluation, to avoid taking up enough memory to store
  339. // an entire evaluation. T can be RDPF, RDPFPair, or RDPFTriple.
  340. template <typename T>
  341. class StreamEval {
  342. const T &rdpf;
  343. size_t &aes_ops;
  344. bool use_expansion;
  345. nbits_t depth;
  346. address_t counter_xor_offset;
  347. address_t indexmask;
  348. address_t pathindex;
  349. address_t nextindex;
  350. std::vector<typename T::node> path;
  351. public:
  352. // Create a StreamEval object that will start its output at index
  353. // start. It will wrap around to 0 when it hits 2^depth. If
  354. // use_expansion is true, then if the DPF has been expanded, just
  355. // output values from that. If use_expansion=false or if the DPF
  356. // has not been expanded, compute the values on the fly. If
  357. // xor_offset is non-zero, then the outputs are actually
  358. // DPF(start XOR xor_offset)
  359. // DPF((start+1) XOR xor_offset)
  360. // DPF((start+2) XOR xor_offset)
  361. // etc.
  362. StreamEval(const T &rdpf, address_t start,
  363. address_t xor_offset, size_t &aes_ops,
  364. bool use_expansion = true);
  365. // Get the next value (or tuple of values) from the evaluator
  366. typename T::LeafNode next();
  367. };
  368. // Parallel evaluation. This class launches a number of threads each
  369. // running a StreamEval to evaluate a chunk of the RDPF (or RDPFPair or
  370. // RDPFTriple), and accumulates the results within each chunk, and then
  371. // accumulates all the chunks together. T can be RDPF, RDPFPair, or
  372. // RDPFTriple.
  373. template <typename T>
  374. struct ParallelEval {
  375. const T &rdpf;
  376. address_t start;
  377. address_t xor_offset;
  378. address_t num_evals;
  379. int num_threads;
  380. size_t &aes_ops;
  381. // Create a Parallel evaluator that will evaluate the given rdpf at
  382. // DPF(start XOR xor_offset)
  383. // DPF((start+1) XOR xor_offset)
  384. // DPF((start+2) XOR xor_offset)
  385. // ...
  386. // DPF((start+num_evals-1) XOR xor_offset)
  387. // where all indices are taken mod 2^depth, and accumulate the
  388. // results into a single answer.
  389. ParallelEval(const T &rdpf, address_t start,
  390. address_t xor_offset, address_t num_evals,
  391. int num_threads, size_t &aes_ops) :
  392. rdpf(rdpf), start(start), xor_offset(xor_offset),
  393. num_evals(num_evals), num_threads(num_threads),
  394. aes_ops(aes_ops) {}
  395. // Run the parallel evaluator. The type V is the type of the
  396. // accumulator; init should be the "zero" value of the accumulator.
  397. // The type W (process) is a lambda type with the signature
  398. // (int, address_t, const T::node &) -> V
  399. // which will be called like this for each i from 0 to num_evals-1,
  400. // across num_thread threads:
  401. // value_i = process(t, i, DPF((start+i) XOR xor_offset))
  402. // t is the thread number (0 <= t < num_threads).
  403. // The resulting num_evals values will be combined using V's +=
  404. // operator, first accumulating the values within each thread
  405. // (starting with the init value), and then accumulating the totals
  406. // from each thread together (again starting with the init value):
  407. //
  408. // total = init
  409. // for each thread t:
  410. // accum_t = init
  411. // for each accum_i generated by thread t:
  412. // accum_t += value_i
  413. // total += accum_t
  414. template <typename V, typename W>
  415. inline V reduce(V init, W process);
  416. };
  417. #include "rdpf.tcc"
  418. #endif