ORExpand.hpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #ifndef __OREXPAND_HPP__
  2. #define __OREXPAND_HPP__
  3. #include <cstddef>
  4. #include <cstdint>
  5. #include "utils.hpp"
  6. #include "oasm_lib.h"
  7. // Notation: for an array A, let A[lo..hi] denote the subarray of
  8. // A that is A[lo], A[lo+1], ..., A[hi-1]. That is, it is inclusive of
  9. // lo and exclusive of hi. The length of the subarray is hi-lo. If
  10. // lo < mid < hi, then A[lo..mid] and A[mid..hi] are a partition of
  11. // A[lo..hi] into two non-empty non-overlapping pieces whose union is
  12. // exactly A[lo..hi].
  13. // This file implements oblivious recursive expansion (ORExpand), which
  14. // is basically ORCompact (the TightCompact function in
  15. // TightCompaction_v2.tcc) run backwards (doing the OSwaps in reverse
  16. // order).
  17. // Throughout this file, "buf" is the data buffer, composed of blocks
  18. // that are each block_size bytes long. block_size must be a size
  19. // supported by the corresponding OSwap_Style. "dest" is an array of
  20. // destinations. The idea is that if the input to ORExpand has, say,
  21. // dest[7] = 19, then the block at index 7 in the input buf will end up
  22. // at index 19 in the output buf (treating buf as an array of
  23. // block_size-byte blocks).
  24. // These functions do _not_ implement arbitrary permutations, however
  25. // (see the WaksmanNetwork.* files for that). They will only expand a
  26. // contiguous range of blocks into a possibly non-contiguous range,
  27. // inserting padding blocks in the intervening spaces, and preserving
  28. // the order of the non-padding blocks. The contiguous range may (in
  29. // some cases; see below) start at some offset z, rather than at the
  30. // beginning of the (sub)array, however, in which case the contiguous
  31. // range may "wrap around" to the beginning of the (sub)array.
  32. // Note that A[lo+((z+k) mod (hi-lo))] is element k (starting from 0) of a
  33. // contiguous range in A[lo..hi], starting at offset z, and wrapping
  34. // around if necessary.
  35. // We denote block i of buf (buf[i]) as a padding block by setting
  36. // dest[i] = 0xffffffff.
  37. // Therefore, suppose we are working on the subarray buf[lo..hi] (that
  38. // is, we only perform operations on buf[lo..hi] and on the
  39. // corresponding dest[lo..hi], and all elements of dest[lo..hi] are
  40. // either themselves in the range lo..hi (inclusive of lo and exclusive
  41. // of hi) or are 0xffffffff). If there are r non-padding blocks (and so
  42. // hi-lo-r padding blocks), and the offset is z, then on input, we must
  43. // have:
  44. //
  45. // lo <= dest[lo+(z mod (hi-lo))]
  46. // < dest[lo+((z+1) mod (hi-lo))]
  47. // < dest[lo+((z+2) mod (hi-lo))]
  48. // ...
  49. // < dest[lo+((z+(r-1)) mod (hi-lo))]
  50. // < hi
  51. //
  52. // and the other elements of dest[lo..hi] are 0xffffffff.
  53. // Note that while the offset may cause the _indices_ to wrap around,
  54. // the dest _values_ have no offset, and cannot wrap around.
  55. // The restriction on z is that if hi-lo is _not_ a power of 2, then z
  56. // must be 0, and indeed lo must be 0 (and so the contiguous range of
  57. // non-padding blocks must start at the beginning of the array).
  58. // Note that z does not need to be passed explicitly; it is implied by
  59. // the contents of dest. In fact, the algorithm does not even need to
  60. // compute it explicitly.
  61. // The values passed are not private, but the _contents_ of the buf and
  62. // dest arrays are.
  63. template <OSwap_Style oswap_style>
  64. void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size,
  65. uint32_t lo, uint32_t hi);
  66. // A convenience wrapper for the initial call. Here, N is the length of
  67. // the input arrays (in block_size-byte blocks for buf, and in 32-bit
  68. // words for dest). z must be 0, so the non-padding elements of dest
  69. // must be at the beginning.
  70. template <OSwap_Style oswap_style>
  71. void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size,
  72. uint32_t N) {
  73. ORExpand<oswap_style>(buf, dest, block_size, 0, N);
  74. }
  75. // Multithreaded versions of ORExpand and its convenience wrapper
  76. template <OSwap_Style oswap_style>
  77. void ORExpand_parallel(unsigned char *buf, uint32_t *dest,
  78. size_t block_size, uint32_t lo, uint32_t hi, threadid_t nthreads);
  79. template <OSwap_Style oswap_style>
  80. void ORExpand_parallel(unsigned char *buf, uint32_t *dest,
  81. size_t block_size, uint32_t N, threadid_t nthreads) {
  82. ORExpand_parallel<oswap_style>(buf, dest, block_size, 0, N, nthreads);
  83. }
  84. // #define PROFILE_OREXPAND
  85. // #define TEST_OREXPAND
  86. #include "ORExpand.tcc"
  87. #ifdef TEST_OREXPAND
  88. void test_ORExpand();
  89. void test_ORExpand_parallel(threadid_t nthreads);
  90. #endif
  91. #endif