123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- #ifndef __OREXPAND_HPP__
- #define __OREXPAND_HPP__
- #include <cstddef>
- #include <cstdint>
- #include "utils.hpp"
- #include "oasm_lib.h"
- // Notation: for an array A, let A[lo..hi] denote the subarray of
- // A that is A[lo], A[lo+1], ..., A[hi-1]. That is, it is inclusive of
- // lo and exclusive of hi. The length of the subarray is hi-lo. If
- // lo < mid < hi, then A[lo..mid] and A[mid..hi] are a partition of
- // A[lo..hi] into two non-empty non-overlapping pieces whose union is
- // exactly A[lo..hi].
- // This file implements oblivious recursive expansion (ORExpand), which
- // is basically ORCompact (the TightCompact function in
- // TightCompaction_v2.tcc) run backwards (doing the OSwaps in reverse
- // order).
- // Throughout this file, "buf" is the data buffer, composed of blocks
- // that are each block_size bytes long. block_size must be a size
- // supported by the corresponding OSwap_Style. "dest" is an array of
- // destinations. The idea is that if the input to ORExpand has, say,
- // dest[7] = 19, then the block at index 7 in the input buf will end up
- // at index 19 in the output buf (treating buf as an array of
- // block_size-byte blocks).
- // These functions do _not_ implement arbitrary permutations, however
- // (see the WaksmanNetwork.* files for that). They will only expand a
- // contiguous range of blocks into a possibly non-contiguous range,
- // inserting padding blocks in the intervening spaces, and preserving
- // the order of the non-padding blocks. The contiguous range may (in
- // some cases; see below) start at some offset z, rather than at the
- // beginning of the (sub)array, however, in which case the contiguous
- // range may "wrap around" to the beginning of the (sub)array.
- // Note that A[lo+((z+k) mod (hi-lo))] is element k (starting from 0) of a
- // contiguous range in A[lo..hi], starting at offset z, and wrapping
- // around if necessary.
- // We denote block i of buf (buf[i]) as a padding block by setting
- // dest[i] = 0xffffffff.
- // Therefore, suppose we are working on the subarray buf[lo..hi] (that
- // is, we only perform operations on buf[lo..hi] and on the
- // corresponding dest[lo..hi], and all elements of dest[lo..hi] are
- // either themselves in the range lo..hi (inclusive of lo and exclusive
- // of hi) or are 0xffffffff). If there are r non-padding blocks (and so
- // hi-lo-r padding blocks), and the offset is z, then on input, we must
- // have:
- //
- // lo <= dest[lo+(z mod (hi-lo))]
- // < dest[lo+((z+1) mod (hi-lo))]
- // < dest[lo+((z+2) mod (hi-lo))]
- // ...
- // < dest[lo+((z+(r-1)) mod (hi-lo))]
- // < hi
- //
- // and the other elements of dest[lo..hi] are 0xffffffff.
- // Note that while the offset may cause the _indices_ to wrap around,
- // the dest _values_ have no offset, and cannot wrap around.
- // The restriction on z is that if hi-lo is _not_ a power of 2, then z
- // must be 0, and indeed lo must be 0 (and so the contiguous range of
- // non-padding blocks must start at the beginning of the array).
- // Note that z does not need to be passed explicitly; it is implied by
- // the contents of dest. In fact, the algorithm does not even need to
- // compute it explicitly.
- // The values passed are not private, but the _contents_ of the buf and
- // dest arrays are.
- template <OSwap_Style oswap_style>
- void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size,
- uint32_t lo, uint32_t hi);
- // A convenience wrapper for the initial call. Here, N is the length of
- // the input arrays (in block_size-byte blocks for buf, and in 32-bit
- // words for dest). z must be 0, so the non-padding elements of dest
- // must be at the beginning.
- template <OSwap_Style oswap_style>
- void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size,
- uint32_t N) {
- ORExpand<oswap_style>(buf, dest, block_size, 0, N);
- }
- // Multithreaded versions of ORExpand and its convenience wrapper
- template <OSwap_Style oswap_style>
- void ORExpand_parallel(unsigned char *buf, uint32_t *dest,
- size_t block_size, uint32_t lo, uint32_t hi, threadid_t nthreads);
- template <OSwap_Style oswap_style>
- void ORExpand_parallel(unsigned char *buf, uint32_t *dest,
- size_t block_size, uint32_t N, threadid_t nthreads) {
- ORExpand_parallel<oswap_style>(buf, dest, block_size, 0, N, nthreads);
- }
- // #define PROFILE_OREXPAND
- // #define TEST_OREXPAND
- #include "ORExpand.tcc"
- #ifdef TEST_OREXPAND
- void test_ORExpand();
- void test_ORExpand_parallel(threadid_t nthreads);
- #endif
- #endif
|