1 year ago · 51cf07751d
--- a/src/aligned_memory_mt.rs
+++ b/src/aligned_memory_mt.rs
@@ -1,120 +0,0 @@
 
															-#![allow(dead_code)]
														
 
															-/* This file is almost identical to the aligned_memory.rs file in the
														
 
															-   spiral-rs crate.  The name is modified from AlignedMemory to
														
 
															-   AlignedMemoryMT, and there is one (unsafe!) change to the API:
														
 
															-
														
 
															-    pub unsafe fn as_mut_ptr(&mut self) -> *mut u64
														
 
															-
														
 
															-   has changed to:
														
 
															-
														
 
															-    pub unsafe fn as_mut_ptr(&self) -> *mut u64
														
 
															-
														
 
															-   The reason for this change is explicitly to allow multiple threads to
														
 
															-   *write* into the memory pool concurrently, with the caveat that the
														
 
															-   threads *must not* try to write into the same memory location.  In
														
 
															-   Spiral, each polynomial created from the database ends up scattered
														
 
															-   into noncontiguous words of memory, but any one word still only comes
														
 
															-   from one polynomial.  So with this change, different threads can read
														
 
															-   different parts of the database to produce different polynomials, and
														
 
															-   write those polynomials into the same memory pool (but *not* the same
														
 
															-   memory locations) at the same time.
														
 
															-*/
														
 
															-
														
 
															-use std::{
														
 
															-    alloc::{alloc_zeroed, dealloc, Layout},
														
 
															-    mem::size_of,
														
 
															-    ops::{Index, IndexMut},
														
 
															-    slice::{from_raw_parts, from_raw_parts_mut},
														
 
															-};
														
 
															-
														
 
															-const ALIGN_SIMD: usize = 64; // enough to support AVX-512
														
 
															-pub type AlignedMemoryMT64 = AlignedMemoryMT<ALIGN_SIMD>;
														
 
															-
														
 
															-pub struct AlignedMemoryMT<const ALIGN: usize> {
														
 
															-    p: *mut u64,
														
 
															-    sz_u64: usize,
														
 
															-    layout: Layout,
														
 
															-}
														
 
															-
														
 
															-impl<const ALIGN: usize> AlignedMemoryMT<{ ALIGN }> {
														
 
															-    pub fn new(sz_u64: usize) -> Self {
														
 
															-        let sz_bytes = sz_u64 * size_of::<u64>();
														
 
															-        let layout = Layout::from_size_align(sz_bytes, ALIGN).unwrap();
														
 
															-
														
 
															-        let ptr;
														
 
															-        unsafe {
														
 
															-            ptr = alloc_zeroed(layout);
														
 
															-        }
														
 
															-
														
 
															-        Self {
														
 
															-            p: ptr as *mut u64,
														
 
															-            sz_u64,
														
 
															-            layout,
														
 
															-        }
														
 
															-    }
														
 
															-
														
 
															-    // pub fn from(data: &[u8]) -> Self {
														
 
															-    //     let sz_u64 = (data.len() + size_of::<u64>() - 1) / size_of::<u64>();
														
 
															-    //     let mut out = Self::new(sz_u64);
														
 
															-    //     let out_slice = out.as_mut_slice();
														
 
															-    //     let mut i = 0;
														
 
															-    //     for chunk in data.chunks(size_of::<u64>()) {
														
 
															-    //         out_slice[i] = u64::from_ne_bytes(chunk);
														
 
															-    //         i += 1;
														
 
															-    //     }
														
 
															-    //     out
														
 
															-    // }
														
 
															-
														
 
															-    pub fn as_slice(&self) -> &[u64] {
														
 
															-        unsafe { from_raw_parts(self.p, self.sz_u64) }
														
 
															-    }
														
 
															-
														
 
															-    pub fn as_mut_slice(&mut self) -> &mut [u64] {
														
 
															-        unsafe { from_raw_parts_mut(self.p, self.sz_u64) }
														
 
															-    }
														
 
															-
														
 
															-    pub unsafe fn as_ptr(&self) -> *const u64 {
														
 
															-        self.p
														
 
															-    }
														
 
															-
														
 
															-    pub unsafe fn as_mut_ptr(&self) -> *mut u64 {
														
 
															-        self.p
														
 
															-    }
														
 
															-
														
 
															-    pub fn len(&self) -> usize {
														
 
															-        self.sz_u64
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-unsafe impl<const ALIGN: usize> Send for AlignedMemoryMT<{ ALIGN }> {}
														
 
															-unsafe impl<const ALIGN: usize> Sync for AlignedMemoryMT<{ ALIGN }> {}
														
 
															-
														
 
															-impl<const ALIGN: usize> Drop for AlignedMemoryMT<{ ALIGN }> {
														
 
															-    fn drop(&mut self) {
														
 
															-        unsafe {
														
 
															-            dealloc(self.p as *mut u8, self.layout);
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-impl<const ALIGN: usize> Index<usize> for AlignedMemoryMT<{ ALIGN }> {
														
 
															-    type Output = u64;
														
 
															-
														
 
															-    fn index(&self, index: usize) -> &Self::Output {
														
 
															-        &self.as_slice()[index]
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-impl<const ALIGN: usize> IndexMut<usize> for AlignedMemoryMT<{ ALIGN }> {
														
 
															-    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
														
 
															-        &mut self.as_mut_slice()[index]
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-impl<const ALIGN: usize> Clone for AlignedMemoryMT<{ ALIGN }> {
														
 
															-    fn clone(&self) -> Self {
														
 
															-        let mut out = Self::new(self.sz_u64);
														
 
															-        out.as_mut_slice().copy_from_slice(self.as_slice());
														
 
															-        out
														
 
															-    }
														
 
															-}
														
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,3 @@
 
															-mod aligned_memory_mt;
														
 
															 pub mod client;
														
 
															 mod ot;
														
 
															 mod params;
														
--- a/src/spiral_mt.rs
+++ b/src/spiral_mt.rs
@@ -1,3 +1,4 @@
 
															+use spiral_rs::aligned_memory::*;
														
 
															 use spiral_rs::arith::*;
														
 
															 use spiral_rs::params::*;
														
 
															 use spiral_rs::poly::*;
														
@@ -6,8 +7,6 @@ use spiral_rs::util::*;
 
															 use rayon::scope;
														
 
															-use crate::aligned_memory_mt::*;
														
 
															-
														
 
															 pub fn load_item_from_slice<'a>(
														
 
															     params: &'a Params,
														
 
															     slice: &[u8],
														
@@ -46,18 +45,29 @@ pub fn load_item_from_slice<'a>(
 
															     out
														
 
															 }
														
 
															-pub fn load_db_from_slice_mt(
														
 
															-    params: &Params,
														
 
															-    slice: &[u8],
														
 
															-    num_threads: usize,
														
 
															-) -> AlignedMemoryMT64 {
														
 
															+pub fn load_db_from_slice_mt(params: &Params, slice: &[u8], num_threads: usize) -> AlignedMemory64 {
														
 
															     let instances = params.instances;
														
 
															     let trials = params.n * params.n;
														
 
															     let dim0 = 1 << params.db_dim_1;
														
 
															     let num_per = 1 << params.db_dim_2;
														
 
															     let num_items = dim0 * num_per;
														
 
															     let db_size_words = instances * trials * num_items * params.poly_len;
														
 
															-    let v: AlignedMemoryMT64 = AlignedMemoryMT64::new(db_size_words);
														
 
															+    let mut v: AlignedMemory64 = AlignedMemory64::new(db_size_words);
														
 
															+
														
 
															+    // Get a pointer to the memory pool of the AlignedMemory64.  We
														
 
															+    // treat it as a usize explicitly so we can pass the same pointer to
														
 
															+    // multiple threads, each of which will cast it to a *mut u64, in
														
 
															+    // order to *write* into the memory pool concurrently. There is a
														
 
															+    // caveat that the threads *must not* try to write into the same
														
 
															+    // memory location.  In Spiral, each polynomial created from the
														
 
															+    // database ends up scattered into noncontiguous words of memory,
														
 
															+    // but any one word still only comes from one polynomial.  So with
														
 
															+    // this mechanism, different threads can read different parts of the
														
 
															+    // database to produce different polynomials, and write those
														
 
															+    // polynomials into the same memory pool (but *not* the same memory
														
 
															+    // locations) at the same time.
														
 
															+
														
 
															+    let vptrusize = unsafe { v.as_mut_ptr() as usize };
														
 
															     for instance in 0..instances {
														
 
															         for trial in 0..trials {
														
@@ -69,13 +79,12 @@ pub fn load_db_from_slice_mt(
 
															                     let items_this_thread =
														
 
															                         items_per_thread_base + if thr < items_per_thread_extra { 1 } else { 0 };
														
 
															                     let item_thread_end = item_thread_start + items_this_thread;
														
 
															-                    let v = &v;
														
 
															                     s.spawn(move |_| {
														
 
															-                        let vptr = unsafe { v.as_mut_ptr() };
														
 
															+                        let vptr = vptrusize as *mut u64;
														
 
															                         for i in item_thread_start..item_thread_end {
														
 
															                             // Swap the halves of the item index so that
														
 
															                             // the polynomials based on the items are
														
 
															-                            // written to the AlignedMemoryMT64 more
														
 
															+                            // written to the AlignedMemory64 more
														
 
															                             // sequentially
														
 
															                             let ii = i / dim0;
														
 
															                             let j = i % dim0;