#ifndef __OREXPAND_HPP__ #define __OREXPAND_HPP__ #include #include #include "utils.hpp" #include "oasm_lib.h" // Notation: for an array A, let A[lo..hi] denote the subarray of // A that is A[lo], A[lo+1], ..., A[hi-1]. That is, it is inclusive of // lo and exclusive of hi. The length of the subarray is hi-lo. If // lo < mid < hi, then A[lo..mid] and A[mid..hi] are a partition of // A[lo..hi] into two non-empty non-overlapping pieces whose union is // exactly A[lo..hi]. // This file implements oblivious recursive expansion (ORExpand), which // is basically ORCompact (the TightCompact function in // TightCompaction_v2.tcc) run backwards (doing the OSwaps in reverse // order). // Throughout this file, "buf" is the data buffer, composed of blocks // that are each block_size bytes long. block_size must be a size // supported by the corresponding OSwap_Style. "dest" is an array of // destinations. The idea is that if the input to ORExpand has, say, // dest[7] = 19, then the block at index 7 in the input buf will end up // at index 19 in the output buf (treating buf as an array of // block_size-byte blocks). // These functions do _not_ implement arbitrary permutations, however // (see the WaksmanNetwork.* files for that). They will only expand a // contiguous range of blocks into a possibly non-contiguous range, // inserting padding blocks in the intervening spaces, and preserving // the order of the non-padding blocks. The contiguous range may (in // some cases; see below) start at some offset z, rather than at the // beginning of the (sub)array, however, in which case the contiguous // range may "wrap around" to the beginning of the (sub)array. // Note that A[lo+((z+k) mod (hi-lo))] is element k (starting from 0) of a // contiguous range in A[lo..hi], starting at offset z, and wrapping // around if necessary. // We denote block i of buf (buf[i]) as a padding block by setting // dest[i] = 0xffffffff. // Therefore, suppose we are working on the subarray buf[lo..hi] (that // is, we only perform operations on buf[lo..hi] and on the // corresponding dest[lo..hi], and all elements of dest[lo..hi] are // either themselves in the range lo..hi (inclusive of lo and exclusive // of hi) or are 0xffffffff). If there are r non-padding blocks (and so // hi-lo-r padding blocks), and the offset is z, then on input, we must // have: // // lo <= dest[lo+(z mod (hi-lo))] // < dest[lo+((z+1) mod (hi-lo))] // < dest[lo+((z+2) mod (hi-lo))] // ... // < dest[lo+((z+(r-1)) mod (hi-lo))] // < hi // // and the other elements of dest[lo..hi] are 0xffffffff. // Note that while the offset may cause the _indices_ to wrap around, // the dest _values_ have no offset, and cannot wrap around. // The restriction on z is that if hi-lo is _not_ a power of 2, then z // must be 0, and indeed lo must be 0 (and so the contiguous range of // non-padding blocks must start at the beginning of the array). // Note that z does not need to be passed explicitly; it is implied by // the contents of dest. In fact, the algorithm does not even need to // compute it explicitly. // The values passed are not private, but the _contents_ of the buf and // dest arrays are. template void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size, uint32_t lo, uint32_t hi); // A convenience wrapper for the initial call. Here, N is the length of // the input arrays (in block_size-byte blocks for buf, and in 32-bit // words for dest). z must be 0, so the non-padding elements of dest // must be at the beginning. template void ORExpand(unsigned char *buf, uint32_t *dest, size_t block_size, uint32_t N) { ORExpand(buf, dest, block_size, 0, N); } // Multithreaded versions of ORExpand and its convenience wrapper template void ORExpand_parallel(unsigned char *buf, uint32_t *dest, size_t block_size, uint32_t lo, uint32_t hi, threadid_t nthreads); template void ORExpand_parallel(unsigned char *buf, uint32_t *dest, size_t block_size, uint32_t N, threadid_t nthreads) { ORExpand_parallel(buf, dest, block_size, 0, N, nthreads); } // #define PROFILE_OREXPAND // #define TEST_OREXPAND #include "ORExpand.tcc" #ifdef TEST_OREXPAND void test_ORExpand(); void test_ORExpand_parallel(threadid_t nthreads); #endif #endif