2 gadi atpakaļ · 51cf07751d
--- a/src/aligned_memory_mt.rs
+++ b/src/aligned_memory_mt.rs
@@ -1,120 +0,0 @@
 
				-#![allow(dead_code)]
			
 
				-/* This file is almost identical to the aligned_memory.rs file in the
			
 
				-   spiral-rs crate.  The name is modified from AlignedMemory to
			
 
				-   AlignedMemoryMT, and there is one (unsafe!) change to the API:
			
 
				-
			
 
				-    pub unsafe fn as_mut_ptr(&mut self) -> *mut u64
			
 
				-
			
 
				-   has changed to:
			
 
				-
			
 
				-    pub unsafe fn as_mut_ptr(&self) -> *mut u64
			
 
				-
			
 
				-   The reason for this change is explicitly to allow multiple threads to
			
 
				-   *write* into the memory pool concurrently, with the caveat that the
			
 
				-   threads *must not* try to write into the same memory location.  In
			
 
				-   Spiral, each polynomial created from the database ends up scattered
			
 
				-   into noncontiguous words of memory, but any one word still only comes
			
 
				-   from one polynomial.  So with this change, different threads can read
			
 
				-   different parts of the database to produce different polynomials, and
			
 
				-   write those polynomials into the same memory pool (but *not* the same
			
 
				-   memory locations) at the same time.
			
 
				-*/
			
 
				-
			
 
				-use std::{
			
 
				-    alloc::{alloc_zeroed, dealloc, Layout},
			
 
				-    mem::size_of,
			
 
				-    ops::{Index, IndexMut},
			
 
				-    slice::{from_raw_parts, from_raw_parts_mut},
			
 
				-};
			
 
				-
			
 
				-const ALIGN_SIMD: usize = 64; // enough to support AVX-512
			
 
				-pub type AlignedMemoryMT64 = AlignedMemoryMT<ALIGN_SIMD>;
			
 
				-
			
 
				-pub struct AlignedMemoryMT<const ALIGN: usize> {
			
 
				-    p: *mut u64,
			
 
				-    sz_u64: usize,
			
 
				-    layout: Layout,
			
 
				-}
			
 
				-
			
 
				-impl<const ALIGN: usize> AlignedMemoryMT<{ ALIGN }> {
			
 
				-    pub fn new(sz_u64: usize) -> Self {
			
 
				-        let sz_bytes = sz_u64 * size_of::<u64>();
			
 
				-        let layout = Layout::from_size_align(sz_bytes, ALIGN).unwrap();
			
 
				-
			
 
				-        let ptr;
			
 
				-        unsafe {
			
 
				-            ptr = alloc_zeroed(layout);
			
 
				-        }
			
 
				-
			
 
				-        Self {
			
 
				-            p: ptr as *mut u64,
			
 
				-            sz_u64,
			
 
				-            layout,
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    // pub fn from(data: &[u8]) -> Self {
			
 
				-    //     let sz_u64 = (data.len() + size_of::<u64>() - 1) / size_of::<u64>();
			
 
				-    //     let mut out = Self::new(sz_u64);
			
 
				-    //     let out_slice = out.as_mut_slice();
			
 
				-    //     let mut i = 0;
			
 
				-    //     for chunk in data.chunks(size_of::<u64>()) {
			
 
				-    //         out_slice[i] = u64::from_ne_bytes(chunk);
			
 
				-    //         i += 1;
			
 
				-    //     }
			
 
				-    //     out
			
 
				-    // }
			
 
				-
			
 
				-    pub fn as_slice(&self) -> &[u64] {
			
 
				-        unsafe { from_raw_parts(self.p, self.sz_u64) }
			
 
				-    }
			
 
				-
			
 
				-    pub fn as_mut_slice(&mut self) -> &mut [u64] {
			
 
				-        unsafe { from_raw_parts_mut(self.p, self.sz_u64) }
			
 
				-    }
			
 
				-
			
 
				-    pub unsafe fn as_ptr(&self) -> *const u64 {
			
 
				-        self.p
			
 
				-    }
			
 
				-
			
 
				-    pub unsafe fn as_mut_ptr(&self) -> *mut u64 {
			
 
				-        self.p
			
 
				-    }
			
 
				-
			
 
				-    pub fn len(&self) -> usize {
			
 
				-        self.sz_u64
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-unsafe impl<const ALIGN: usize> Send for AlignedMemoryMT<{ ALIGN }> {}
			
 
				-unsafe impl<const ALIGN: usize> Sync for AlignedMemoryMT<{ ALIGN }> {}
			
 
				-
			
 
				-impl<const ALIGN: usize> Drop for AlignedMemoryMT<{ ALIGN }> {
			
 
				-    fn drop(&mut self) {
			
 
				-        unsafe {
			
 
				-            dealloc(self.p as *mut u8, self.layout);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-impl<const ALIGN: usize> Index<usize> for AlignedMemoryMT<{ ALIGN }> {
			
 
				-    type Output = u64;
			
 
				-
			
 
				-    fn index(&self, index: usize) -> &Self::Output {
			
 
				-        &self.as_slice()[index]
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-impl<const ALIGN: usize> IndexMut<usize> for AlignedMemoryMT<{ ALIGN }> {
			
 
				-    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
			
 
				-        &mut self.as_mut_slice()[index]
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-impl<const ALIGN: usize> Clone for AlignedMemoryMT<{ ALIGN }> {
			
 
				-    fn clone(&self) -> Self {
			
 
				-        let mut out = Self::new(self.sz_u64);
			
 
				-        out.as_mut_slice().copy_from_slice(self.as_slice());
			
 
				-        out
			
 
				-    }
			
 
				-}
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,3 @@
 
				-mod aligned_memory_mt;
			
 
				 pub mod client;
			
 
				 mod ot;
			
 
				 mod params;
			
--- a/src/spiral_mt.rs
+++ b/src/spiral_mt.rs
@@ -1,3 +1,4 @@
 
				+use spiral_rs::aligned_memory::*;
			
 
				 use spiral_rs::arith::*;
			
 
				 use spiral_rs::params::*;
			
 
				 use spiral_rs::poly::*;
			
@@ -6,8 +7,6 @@ use spiral_rs::util::*;
 
				 
			
 
				 use rayon::scope;
			
 
				 
			
 
				-use crate::aligned_memory_mt::*;
			
 
				-
			
 
				 pub fn load_item_from_slice<'a>(
			
 
				     params: &'a Params,
			
 
				     slice: &[u8],
			
@@ -46,18 +45,29 @@ pub fn load_item_from_slice<'a>(
 
				     out
			
 
				 }
			
 
				 
			
 
				-pub fn load_db_from_slice_mt(
			
 
				-    params: &Params,
			
 
				-    slice: &[u8],
			
 
				-    num_threads: usize,
			
 
				-) -> AlignedMemoryMT64 {
			
 
				+pub fn load_db_from_slice_mt(params: &Params, slice: &[u8], num_threads: usize) -> AlignedMemory64 {
			
 
				     let instances = params.instances;
			
 
				     let trials = params.n * params.n;
			
 
				     let dim0 = 1 << params.db_dim_1;
			
 
				     let num_per = 1 << params.db_dim_2;
			
 
				     let num_items = dim0 * num_per;
			
 
				     let db_size_words = instances * trials * num_items * params.poly_len;
			
 
				-    let v: AlignedMemoryMT64 = AlignedMemoryMT64::new(db_size_words);
			
 
				+    let mut v: AlignedMemory64 = AlignedMemory64::new(db_size_words);
			
 
				+
			
 
				+    // Get a pointer to the memory pool of the AlignedMemory64.  We
			
 
				+    // treat it as a usize explicitly so we can pass the same pointer to
			
 
				+    // multiple threads, each of which will cast it to a *mut u64, in
			
 
				+    // order to *write* into the memory pool concurrently. There is a
			
 
				+    // caveat that the threads *must not* try to write into the same
			
 
				+    // memory location.  In Spiral, each polynomial created from the
			
 
				+    // database ends up scattered into noncontiguous words of memory,
			
 
				+    // but any one word still only comes from one polynomial.  So with
			
 
				+    // this mechanism, different threads can read different parts of the
			
 
				+    // database to produce different polynomials, and write those
			
 
				+    // polynomials into the same memory pool (but *not* the same memory
			
 
				+    // locations) at the same time.
			
 
				+
			
 
				+    let vptrusize = unsafe { v.as_mut_ptr() as usize };
			
 
				 
			
 
				     for instance in 0..instances {
			
 
				         for trial in 0..trials {
			
@@ -69,13 +79,12 @@ pub fn load_db_from_slice_mt(
 
				                     let items_this_thread =
			
 
				                         items_per_thread_base + if thr < items_per_thread_extra { 1 } else { 0 };
			
 
				                     let item_thread_end = item_thread_start + items_this_thread;
			
 
				-                    let v = &v;
			
 
				                     s.spawn(move |_| {
			
 
				-                        let vptr = unsafe { v.as_mut_ptr() };
			
 
				+                        let vptr = vptrusize as *mut u64;
			
 
				                         for i in item_thread_start..item_thread_end {
			
 
				                             // Swap the halves of the item index so that
			
 
				                             // the polynomials based on the items are
			
 
				-                            // written to the AlignedMemoryMT64 more
			
 
				+                            // written to the AlignedMemory64 more
			
 
				                             // sequentially
			
 
				                             let ii = i / dim0;
			
 
				                             let j = i % dim0;