#include // arc4random_buf #include "rdpf.hpp" #include "bitutils.hpp" #undef RDPF_MTGEN_TIMING_1 #ifdef RDPF_MTGEN_TIMING_1 // Timing tests for multithreaded generation of RDPFs // nthreads = 0 to not launch threads at all // run for num_iters iterations, output the number of millisections // total for all of the iterations // // Results: roughly 50 µs to launch the thread pool with 1 thread, and // roughly 30 additional µs for each additional thread. Each iteration // of the inner loop takes about 4 to 5 ns. This works out to around // level 19 where it starts being worth it to multithread, and you // should use at most sqrt(2^{level}/6000) threads. static void mtgen_timetest_1(nbits_t level, int nthreads, size_t num_iters, const DPFnode *curlevel, DPFnode *nextlevel, size_t &aes_ops) { if (num_iters == 0) { num_iters = 1; } size_t prev_aes_ops = aes_ops; DPFnode L = _mm_setzero_si128(); DPFnode R = _mm_setzero_si128(); // The tweak causes us to compute something slightly different every // iteration of the loop, so that the compiler doesn't notice we're // doing the same thing num_iters times and optimize it away DPFnode tweak = _mm_setzero_si128(); auto start = boost::chrono::steady_clock::now(); for(size_t iter=0;iter(elapsed) << " " << (aes_ops-prev_aes_ops) << " AES\n"; dump_node(L); dump_node(R); } #endif