Browse Source

A simple cast can replace the entire need for the AlignedMemoryMT type

Ian Goldberg 1 year ago
parent
commit
51cf07751d
3 changed files with 20 additions and 132 deletions
  1. 0 120
      src/aligned_memory_mt.rs
  2. 0 1
      src/lib.rs
  3. 20 11
      src/spiral_mt.rs

+ 0 - 120
src/aligned_memory_mt.rs

@@ -1,120 +0,0 @@
-#![allow(dead_code)]
-/* This file is almost identical to the aligned_memory.rs file in the
-   spiral-rs crate.  The name is modified from AlignedMemory to
-   AlignedMemoryMT, and there is one (unsafe!) change to the API:
-
-    pub unsafe fn as_mut_ptr(&mut self) -> *mut u64
-
-   has changed to:
-
-    pub unsafe fn as_mut_ptr(&self) -> *mut u64
-
-   The reason for this change is explicitly to allow multiple threads to
-   *write* into the memory pool concurrently, with the caveat that the
-   threads *must not* try to write into the same memory location.  In
-   Spiral, each polynomial created from the database ends up scattered
-   into noncontiguous words of memory, but any one word still only comes
-   from one polynomial.  So with this change, different threads can read
-   different parts of the database to produce different polynomials, and
-   write those polynomials into the same memory pool (but *not* the same
-   memory locations) at the same time.
-*/
-
-use std::{
-    alloc::{alloc_zeroed, dealloc, Layout},
-    mem::size_of,
-    ops::{Index, IndexMut},
-    slice::{from_raw_parts, from_raw_parts_mut},
-};
-
-const ALIGN_SIMD: usize = 64; // enough to support AVX-512
-pub type AlignedMemoryMT64 = AlignedMemoryMT<ALIGN_SIMD>;
-
-pub struct AlignedMemoryMT<const ALIGN: usize> {
-    p: *mut u64,
-    sz_u64: usize,
-    layout: Layout,
-}
-
-impl<const ALIGN: usize> AlignedMemoryMT<{ ALIGN }> {
-    pub fn new(sz_u64: usize) -> Self {
-        let sz_bytes = sz_u64 * size_of::<u64>();
-        let layout = Layout::from_size_align(sz_bytes, ALIGN).unwrap();
-
-        let ptr;
-        unsafe {
-            ptr = alloc_zeroed(layout);
-        }
-
-        Self {
-            p: ptr as *mut u64,
-            sz_u64,
-            layout,
-        }
-    }
-
-    // pub fn from(data: &[u8]) -> Self {
-    //     let sz_u64 = (data.len() + size_of::<u64>() - 1) / size_of::<u64>();
-    //     let mut out = Self::new(sz_u64);
-    //     let out_slice = out.as_mut_slice();
-    //     let mut i = 0;
-    //     for chunk in data.chunks(size_of::<u64>()) {
-    //         out_slice[i] = u64::from_ne_bytes(chunk);
-    //         i += 1;
-    //     }
-    //     out
-    // }
-
-    pub fn as_slice(&self) -> &[u64] {
-        unsafe { from_raw_parts(self.p, self.sz_u64) }
-    }
-
-    pub fn as_mut_slice(&mut self) -> &mut [u64] {
-        unsafe { from_raw_parts_mut(self.p, self.sz_u64) }
-    }
-
-    pub unsafe fn as_ptr(&self) -> *const u64 {
-        self.p
-    }
-
-    pub unsafe fn as_mut_ptr(&self) -> *mut u64 {
-        self.p
-    }
-
-    pub fn len(&self) -> usize {
-        self.sz_u64
-    }
-}
-
-unsafe impl<const ALIGN: usize> Send for AlignedMemoryMT<{ ALIGN }> {}
-unsafe impl<const ALIGN: usize> Sync for AlignedMemoryMT<{ ALIGN }> {}
-
-impl<const ALIGN: usize> Drop for AlignedMemoryMT<{ ALIGN }> {
-    fn drop(&mut self) {
-        unsafe {
-            dealloc(self.p as *mut u8, self.layout);
-        }
-    }
-}
-
-impl<const ALIGN: usize> Index<usize> for AlignedMemoryMT<{ ALIGN }> {
-    type Output = u64;
-
-    fn index(&self, index: usize) -> &Self::Output {
-        &self.as_slice()[index]
-    }
-}
-
-impl<const ALIGN: usize> IndexMut<usize> for AlignedMemoryMT<{ ALIGN }> {
-    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
-        &mut self.as_mut_slice()[index]
-    }
-}
-
-impl<const ALIGN: usize> Clone for AlignedMemoryMT<{ ALIGN }> {
-    fn clone(&self) -> Self {
-        let mut out = Self::new(self.sz_u64);
-        out.as_mut_slice().copy_from_slice(self.as_slice());
-        out
-    }
-}

+ 0 - 1
src/lib.rs

@@ -1,4 +1,3 @@
-mod aligned_memory_mt;
 pub mod client;
 mod ot;
 mod params;

+ 20 - 11
src/spiral_mt.rs

@@ -1,3 +1,4 @@
+use spiral_rs::aligned_memory::*;
 use spiral_rs::arith::*;
 use spiral_rs::params::*;
 use spiral_rs::poly::*;
@@ -6,8 +7,6 @@ use spiral_rs::util::*;
 
 use rayon::scope;
 
-use crate::aligned_memory_mt::*;
-
 pub fn load_item_from_slice<'a>(
     params: &'a Params,
     slice: &[u8],
@@ -46,18 +45,29 @@ pub fn load_item_from_slice<'a>(
     out
 }
 
-pub fn load_db_from_slice_mt(
-    params: &Params,
-    slice: &[u8],
-    num_threads: usize,
-) -> AlignedMemoryMT64 {
+pub fn load_db_from_slice_mt(params: &Params, slice: &[u8], num_threads: usize) -> AlignedMemory64 {
     let instances = params.instances;
     let trials = params.n * params.n;
     let dim0 = 1 << params.db_dim_1;
     let num_per = 1 << params.db_dim_2;
     let num_items = dim0 * num_per;
     let db_size_words = instances * trials * num_items * params.poly_len;
-    let v: AlignedMemoryMT64 = AlignedMemoryMT64::new(db_size_words);
+    let mut v: AlignedMemory64 = AlignedMemory64::new(db_size_words);
+
+    // Get a pointer to the memory pool of the AlignedMemory64.  We
+    // treat it as a usize explicitly so we can pass the same pointer to
+    // multiple threads, each of which will cast it to a *mut u64, in
+    // order to *write* into the memory pool concurrently. There is a
+    // caveat that the threads *must not* try to write into the same
+    // memory location.  In Spiral, each polynomial created from the
+    // database ends up scattered into noncontiguous words of memory,
+    // but any one word still only comes from one polynomial.  So with
+    // this mechanism, different threads can read different parts of the
+    // database to produce different polynomials, and write those
+    // polynomials into the same memory pool (but *not* the same memory
+    // locations) at the same time.
+
+    let vptrusize = unsafe { v.as_mut_ptr() as usize };
 
     for instance in 0..instances {
         for trial in 0..trials {
@@ -69,13 +79,12 @@ pub fn load_db_from_slice_mt(
                     let items_this_thread =
                         items_per_thread_base + if thr < items_per_thread_extra { 1 } else { 0 };
                     let item_thread_end = item_thread_start + items_this_thread;
-                    let v = &v;
                     s.spawn(move |_| {
-                        let vptr = unsafe { v.as_mut_ptr() };
+                        let vptr = vptrusize as *mut u64;
                         for i in item_thread_start..item_thread_end {
                             // Swap the halves of the item index so that
                             // the polynomials based on the items are
-                            // written to the AlignedMemoryMT64 more
+                            // written to the AlignedMemory64 more
                             // sequentially
                             let ii = i / dim0;
                             let j = i % dim0;