ソースを参照

3P preprocessing

Ian Goldberg 1 年間 前
コミット
8b1aebc54e

+ 0 - 3
Docker/cleanup.sh

@@ -3,6 +3,3 @@ docker stop duoram_p0 &
 docker stop duoram_p1 &
 docker stop duoram_p2 &
 wait
-docker rm duoram_p0
-docker rm duoram_p1
-docker rm duoram_p2

+ 14 - 3
Docker/run-docker.sh

@@ -1,6 +1,17 @@
 #!/bin/bash
 
-docker run -d --name duoram_p2 -t duoram
-docker run -d --name duoram_p1 -t duoram
-docker run -d --name duoram_p0 -t duoram
+if [ "$DUORAM_STORAGE" != "" ]; then
+    mkdir -p ${DUORAM_STORAGE}/party{0,1,2}
+    S0ARG="-v ${DUORAM_STORAGE}/party0:/root/duoram/duoram-online/preprocflags"
+    S1ARG="-v ${DUORAM_STORAGE}/party1:/root/duoram/duoram-online/preprocflags"
+    S2ARG="-v ${DUORAM_STORAGE}/party2:/root/duoram/duoram-online/preprocflags"
+fi
+
+# The SYS_NICE capability allows you to use numactl to pin processes to
+# NUMA nodes and/or individual cores
+
+docker run --cap-add SYS_NICE -d --rm --name duoram_p0 $S0ARG -t duoram
+docker run --cap-add SYS_NICE -d --rm --name duoram_p1 $S1ARG -t duoram
+docker run --cap-add SYS_NICE -d --rm --name duoram_p2 $S2ARG -t duoram
+
 echo "All dockers launched"

+ 22 - 22
Docker/run-experiment.sh

@@ -12,8 +12,7 @@ phase=online
 mode=read
 size=20
 iters=128
-modeval=0
-ramsize=128
+ramgb=128
 if [ "$1" != "" ]; then
     mode="$1"
 fi
@@ -26,28 +25,25 @@ fi
 if [ "$4" != "" ]; then
     phase="$4"
 fi
-
 if [ "$5" != "" ]; then
     duoramtype="$5"
 fi
 if [ "$6" != "" ]; then
-    ramsize="$6"
-fi
-if [ "$mode" = "read" ]; then
-    modeval="0"
+    ramgb="$6"
 fi
 
-if [ "$mode" = "write" ]; then
-    modeval="0"
-fi
+if [ "$duoramtype" = "3P" -a "$phase" = "preproc" ]; then
+    numdpfs=$iters
+    if [ "$mode" = "read" -o "$mode" = "write" ]; then
+        numdpfs=$((3*iters))
+    fi
 
-if [ "$mode" = "readwrite" ]; then
-    modeval="1"
+    if [ "$mode" = "readwrite" ]; then
+        numdpfs=$((6*iters))
+    fi
 fi
 
-items=$((1<<size))
-
-echo ===== Running duoram $mode $size $iters $duoramtype $ramsize
+echo ===== Running duoram $mode $size $iters $phase $duoramtype $ramgb
 date "+===== Start %s %F %T"
 
     if [ "$4" = "online" -a "$5" = "3P" ]; then
@@ -80,11 +76,15 @@ date "+===== Start %s %F %T"
         savefilep0=$$.p0.out
         savefilep1=$$.p1.out
         savefilep2=$$.p2.out
-        docker exec -w /root/duoram/preprocessing duoram_p2 bash -c "$DUORAM_NUMA_P2 stdbuf -o 0  ./p2preprocessing $p0addr $p1addr $iters $size $modeval $ramsize > $savefilep2 2>&1" &
+        docker exec -w /root/duoram/duoram-online/preprocflags duoram_p0 bash -c "rm -f *" &
+        docker exec -w /root/duoram/duoram-online/preprocflags duoram_p1 bash -c "rm -f *" &
+        docker exec -w /root/duoram/duoram-online/preprocflags duoram_p2 bash -c "rm -f *" &
+        wait
+        docker exec -w /root/duoram/preprocessing duoram_p2 bash -c "$DUORAM_NUMA_P2 stdbuf -o 0  ./p2preprocessing $p0addr $p1addr $numdpfs $size $ramgb > $savefilep2 2>&1" &
         sleep 4
-        docker exec -w /root/duoram/preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1  $p0addr $p2addr $iters $size $modeval $ramsize > $savefilep1 2>&1" &
+        docker exec -w /root/duoram/preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1  $p0addr $p2addr $numdpfs $size $ramgb > $savefilep1 2>&1" &
         sleep 2
-        docker exec -w /root/duoram/preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0  $p1addr $p2addr $iters $size $modeval $ramsize > $savefilep0 2>&1" &
+        docker exec -w /root/duoram/preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0  $p1addr $p2addr $numdpfs $size $ramgb > $savefilep0 2>&1" &
         wait
         echo ===== P0 output
         docker exec -w /root/duoram/preprocessing duoram_p0 cat $savefilep0
@@ -127,9 +127,9 @@ date "+===== Start %s %F %T"
         docker exec -w /root/duoram/2p-preprocessing duoram_p0 ./OT $p0addr $p1addr 0  $((128*iters*size*3))  &
         sleep 2
         wait
-        docker exec -w /root/duoram/2p-preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1 $p0addr $iters $size $ramsize > $savefilep1 2>&1" &
+        docker exec -w /root/duoram/2p-preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1 $p0addr $iters $size $ramgb > $savefilep1 2>&1" &
         sleep 4
-        docker exec -w /root/duoram/2p-preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0 $p1addr $iters $size $ramsize > $savefilep0 2>&1" &
+        docker exec -w /root/duoram/2p-preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0 $p1addr $iters $size $ramgb > $savefilep0 2>&1" &
         wait
         echo ===== P0 output
         docker exec -w /root/duoram/2p-preprocessing duoram_p0 cat $savefilep0
@@ -156,9 +156,9 @@ date "+===== Start %s %F %T"
         docker exec -w /root/duoram/2p-preprocessing duoram_p0 ./OT $p0addr $p1addr 0  $((128*iters*size*3))  &
         sleep 2
         wait
-        docker exec -w /root/duoram/2p-preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1 $p0addr $iters $size $ramsize > $savefilep1 2>&1" &
+        docker exec -w /root/duoram/2p-preprocessing duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./preprocessing1 $p0addr $iters $size $ramgb > $savefilep1 2>&1" &
         sleep 4
-        docker exec -w /root/duoram/2p-preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0 $p1addr $iters $size $ramsize > $savefilep0 2>&1" &
+        docker exec -w /root/duoram/2p-preprocessing duoram_p0 bash -c "$DUORAM_NUMA_P0 stdbuf -o 0  ./preprocessing0 $p1addr $iters $size $ramgb > $savefilep0 2>&1" &
         wait
         docker exec -w /root/duoram/duoram-online duoram_p1 bash -c "$DUORAM_NUMA_P1 stdbuf -o 0  ./2Pduoram1 $p0addr $p2addr $size $iters $iters $iters 1  > $savefilep1 2>&1" &
         sleep 4

+ 2 - 2
preprocessing/dpfgen.h

@@ -66,7 +66,7 @@ void compute_CW(cw_construction computecw_array, size_t ind, size_t layer,tcp::s
 
 	//exchange blinded shares for OSWAP.
   boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(BlindsCW)));
-  communication_cost += sizeof(__m128i);
+  communication_cost += sizeof(BlindsCW);
 
 	boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(BlindsCW)));
 	
@@ -90,7 +90,7 @@ void compute_CW(cw_construction computecw_array, size_t ind, size_t layer,tcp::s
 
  boost::asio::write(sout, boost::asio::buffer(&cwsent, sizeof(cwsent)));
  boost::asio::read(sout, boost::asio::buffer(&cwrecv, sizeof(cwrecv)));
- communication_cost += sizeof(__m128i);
+ communication_cost += sizeof(cwsent);
  cwrecv.cw ^= cwsent.cw;
  cwrecv.cwbit[0] ^= (cwsent.cwbit[0] ^ 1);
  cwrecv.cwbit[1] ^= (cwsent.cwbit[1]);

+ 14 - 17
preprocessing/p2preprocessing.cpp

@@ -82,12 +82,11 @@ int main(int argc, char* argv[])
  const size_t n_threads = atoi(argv[3]);
  const size_t number_of_sockets = 5 * n_threads;
  const size_t db_nitems = 1ULL << atoi(argv[4]);
- const size_t op = atoi(argv[5]);
- const size_t maxRAM = atoi(argv[6]);
+ const size_t maxRAM = atoi(argv[5]);
 
  std::cout << "maxRAM = "  << maxRAM << std::endl;
  size_t RAM_needed = 0;
- RAM_needed = n_threads *  9 * ((sizeof(__m128i) * db_nitems));
+ RAM_needed = n_threads *  164 * db_nitems;
  std::cout << "RAM needed = " << RAM_needed << " bytes = " << RAM_needed/1073741824 << " GiB" << std::endl;
  size_t n_batches = std::ceil(double(RAM_needed)/(1073741824 * maxRAM));
  std::cout << "n_batches = " << n_batches << std::endl;
@@ -136,6 +135,7 @@ int main(int argc, char* argv[])
 
 
 
+   auto start = std::chrono::steady_clock::now(); 
 
 	__m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
 	int8_t  ** flags0  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
@@ -200,10 +200,14 @@ int main(int argc, char* argv[])
 
   boost::asio::write(sockets0[0], boost::asio::buffer(&computecw0_array,  sizeof(computecw0_array)));
   boost::asio::write(sockets1[0], boost::asio::buffer(&computecw1_array,  sizeof(computecw1_array)));
+  communication_cost += sizeof(computecw0_array);
+  communication_cost += sizeof(computecw1_array);
 
   boost::asio::read(sockets0[0], boost::asio::buffer(dpf_instance0, n_threads * sizeof(dpfP2)));
   boost::asio::read(sockets1[0], boost::asio::buffer(dpf_instance1, n_threads * sizeof(dpfP2))); 
 
+     for(size_t iter = 0; iter < n_batches; ++iter)
+     { 
   boost::asio::thread_pool pool(thread_per_batch); 
     for(size_t j = 0; j < thread_per_batch; ++j)
     {
@@ -212,21 +216,8 @@ int main(int argc, char* argv[])
     }  
 
  pool.join();
+     }
  
- if(op == 1)
- {
-    boost::asio::thread_pool pool(thread_per_batch); 
-    for(size_t j = 0; j < thread_per_batch; ++j)
-    {
-     boost::asio::post(pool, std::bind(mpc_gen,  std::ref(depth), std::ref(aeskey), db_nitems, n_threads,  std::ref(sockets0), std::ref(sockets1), 
-                                     output0, flags0,  output1, flags1,  std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));    
-    }  
-
-   pool.join();
- }
-
-
-
   boost::asio::thread_pool pool3(thread_per_batch); 
   
  int64_t ** leaves0    = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
@@ -282,6 +273,12 @@ int main(int argc, char* argv[])
    }
   #endif
   
+    auto end = std::chrono::steady_clock::now();
+    std::chrono::duration<double> elapsed_seconds = end-start;
+    //std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n";
+    std::cout << "WallClockTime: "  << elapsed_seconds.count() << std::endl;
+
+    std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
   return 0;
 }
 

+ 3 - 23
preprocessing/preprocessing.cpp

@@ -52,8 +52,7 @@ int main(int argc, char * argv[])
    const std::string host2 = (argc < 3) ? "127.0.0.1" : argv[2];
    const size_t n_threads = atoi(argv[3]);
    const size_t expo = atoi(argv[4]);
-   const size_t op = atoi(argv[5]);
-   const size_t maxRAM = atoi(argv[6]);
+   const size_t maxRAM = atoi(argv[5]);
    //std::cout << "n_threads = " << n_threads << std::endl;
  
 
@@ -71,7 +70,7 @@ int main(int argc, char * argv[])
    const size_t db_nitems = 1ULL << atoi(argv[4]);
       //std::cout << "maxRAM = "  << maxRAM << std::endl;
       size_t RAM_needed = 0;
-      RAM_needed = n_threads *  9 * ((sizeof(__m128i) * db_nitems));
+      RAM_needed = n_threads * 164 * db_nitems;
       //std::cout << "RAM needed = " << RAM_needed << " bytes = " << RAM_needed/1073741824 << " GiB" << std::endl;
        size_t n_batches = std::ceil(double(RAM_needed)/(1073741824 * maxRAM));
       //std::cout << "n_batches = " << n_batches << std::endl;
@@ -121,7 +120,6 @@ int main(int argc, char * argv[])
 
  
      boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));
-     communication_cost += sizeof(computecw_array);
      #ifdef VERBOSE
       std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;
      #endif
@@ -142,24 +140,6 @@ int main(int argc, char * argv[])
         pool.join();  
      }
       
-     bool interleaved = false;
-      
-     if(op == 1) interleaved = true;
-      
-     if(interleaved)
-     {
-      for(size_t iter = 0; iter < n_batches; ++iter)
-      { 
-        boost::asio::thread_pool pool2(thread_per_batch);
-        for(size_t j = 0; j < thread_per_batch; ++j)
-        {
-         boost::asio::post(pool2,  std::bind(create_dpfs, reading,  db_nitems,	std::ref    (aeskey),  target_share_read[j],  std::ref(socketsPb), std::ref(socketsP2), 0, db_nitems-1, 
-                                             output[j],  flags[j], std::ref(final_correction_word[j]), computecw_array, std::ref(dpf_instance),  party, 5 * j, j));	 	  
-        }
-         pool2.join();  
-       }
-      }
-     
      boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.
      communication_cost += (n_threads * sizeof(dpfP2));
  
@@ -238,7 +218,7 @@ int main(int argc, char * argv[])
  
     // std::cout << "elapsed_ FIO = " << elapsed_seconds.count() << std::endl;
 
-    std::cout << "CommunicationCost: " << communication_cost/1024 << " KiB" << std::endl;
+    std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
     #ifdef VERBOSE
      for(size_t j = 0; j < n_threads; ++j)
      {