iang
/
teems


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							#ifndef __SORT_HPP__
#define __SORT_HPP__

#include <functional>

#include "WaksmanNetwork.hpp"

// We have Nr elements to sort, at the beginning of an array of
// allocated size Na items.  Nr and Na are _not_ secret.  The strategy
// is to get the smallest (precomputed) WaksmanNetwork (on a random
// permutation) and WNEvalPlan, each of size Nw, where Nr <= Nw <= Na.
// Nw is also not secret.  Then mark the Nw-Nr items following the given
// elements as padding, use the WaksmanNetwork and WNEvalPlan to shuffle
// the Nw items, then non-obliviously sort the non-padding items.  The
// sort itself is done by making an index array holding just the sort
// keys and original indices into the array, but only of the non-padding
// items.  Sort the index array, and call back a std::function with the
// sorted index array and the shuffled array so that it can read out the
// elements in sorted order.  This function does not have to be
// oblivious to what elements it is reading, because they've already
// been obliviously shuffled.

// Precompute a WaksmanNetwork of size N for a random permutation.  This
// call does not itself use threads, but may be called from a background
// thread.  These are consumed as they are used, so you need to keep
// making more.  Returns the number of WaksmanNetworks available at that
// size.
size_t sort_precompute(uint32_t N);

// Return a vector of the precomputed sizes we've used since we were
// last asked
std::vector<uint32_t> sort_get_used();

// Precompute a WNEvalPlan for a given size and number of threads.
// These are not consumed as they are used, so you only need to call
// this once for each (size,nthreads) pair you need.  The precomputation
// itself only uses a single thread, but also may be called from a
// background thread.
void sort_precompute_evalplan(uint32_t N, threadid_t nthreads);

// Shuffle Nr items at the beginning of an allocated array of Na items
// using up to nthreads threads.  The items to shuffle are byte arrays
// of size msg_size.  Return Nw, the size of the Waksman network we
// used, which must satisfy Nr <= Nw <= Na.
uint32_t shuffle_mtobliv(threadid_t nthreads, uint8_t* items, uint16_t msg_size,
    uint32_t Nr, uint32_t Na);

// Sort Nr items at the beginning of an allocated array of Na items
// using up to nthreads threads.  The items to sort are byte arrays of
// size msg_size.  The keys are of type T.  T must have set_key<T> and
// compare_keys<T> defined.  The items will be shuffled in-place, and a
// sorted array of keys will be passed to the provided callback
// function.
template<typename T>
void sort_mtobliv(threadid_t nthreads, uint8_t* items, uint16_t msg_size,
    uint32_t Nr, uint32_t Na,
    // the arguments to the callback are items, the sorted keys, and
    // the number of non-padding items
    std::function<void(const uint8_t*, const T*, uint32_t Nr)>);

// As above, but also pass an Nr*msg_size-byte buffer outbuf to put
// the sorted values into, instead of passing a callback.  This calls
// the above function, then copies the data in sorted order into outbuf.
// Note: the outbuf buffer cannot overlap the items buffer.
template<typename T>
void sort_mtobliv(threadid_t nthreads, uint8_t* items, uint16_t msg_size,
    uint32_t Nr, uint32_t Na, uint8_t *outbuf);

// As above, but the first Nr msg_size-byte entries in the items array
// will end up with the sorted values.  Note: if Nr < Na, entries beyond
// Nr may also change, but you should not even look at those values!
// This calls the above function with a temporary buffer, then copies
// that buffer back into the items array, so it's less efficient, both
// in memory and CPU, than the above functions.
template<typename T>
void sort_mtobliv(threadid_t nthreads, uint8_t* items, uint16_t msg_size,
    uint32_t Nr, uint32_t Na);

template<typename T>
inline void set_key(T* key, const uint8_t *item, uint32_t index);

template<typename T>
int compare_keys(const void* a, const void* b);

// The different kinds of keys we sort on

// The 32-bit userid (which is the 10-bit node id and the 22-bit
// per-node userid)
struct UidKey {
    uint64_t uid_index;

    inline uint32_t index() const { return (uint32_t) uid_index; }
};

// The above and also the priority (for public routing)
struct UidPriorityKey {
    uint64_t uid_priority;
    uint32_t idx;

    inline uint32_t index() const { return idx; }
};

// Just the nodeid (not the per-node userid) and the priority (for
// public routing)
struct NidPriorityKey {
    uint64_t nid_priority;
    uint32_t idx;

    inline uint32_t index() const { return idx; }
};

#include "sort.tcc"

#endif