|
@@ -1,3 +1,4 @@
|
|
|
|
+use spiral_rs::aligned_memory::*;
|
|
use spiral_rs::arith::*;
|
|
use spiral_rs::arith::*;
|
|
use spiral_rs::params::*;
|
|
use spiral_rs::params::*;
|
|
use spiral_rs::poly::*;
|
|
use spiral_rs::poly::*;
|
|
@@ -6,8 +7,6 @@ use spiral_rs::util::*;
|
|
|
|
|
|
use rayon::scope;
|
|
use rayon::scope;
|
|
|
|
|
|
-use crate::aligned_memory_mt::*;
|
|
|
|
-
|
|
|
|
pub fn load_item_from_slice<'a>(
|
|
pub fn load_item_from_slice<'a>(
|
|
params: &'a Params,
|
|
params: &'a Params,
|
|
slice: &[u8],
|
|
slice: &[u8],
|
|
@@ -46,18 +45,29 @@ pub fn load_item_from_slice<'a>(
|
|
out
|
|
out
|
|
}
|
|
}
|
|
|
|
|
|
-pub fn load_db_from_slice_mt(
|
|
|
|
- params: &Params,
|
|
|
|
- slice: &[u8],
|
|
|
|
- num_threads: usize,
|
|
|
|
-) -> AlignedMemoryMT64 {
|
|
|
|
|
|
+pub fn load_db_from_slice_mt(params: &Params, slice: &[u8], num_threads: usize) -> AlignedMemory64 {
|
|
let instances = params.instances;
|
|
let instances = params.instances;
|
|
let trials = params.n * params.n;
|
|
let trials = params.n * params.n;
|
|
let dim0 = 1 << params.db_dim_1;
|
|
let dim0 = 1 << params.db_dim_1;
|
|
let num_per = 1 << params.db_dim_2;
|
|
let num_per = 1 << params.db_dim_2;
|
|
let num_items = dim0 * num_per;
|
|
let num_items = dim0 * num_per;
|
|
let db_size_words = instances * trials * num_items * params.poly_len;
|
|
let db_size_words = instances * trials * num_items * params.poly_len;
|
|
- let v: AlignedMemoryMT64 = AlignedMemoryMT64::new(db_size_words);
|
|
|
|
|
|
+ let mut v: AlignedMemory64 = AlignedMemory64::new(db_size_words);
|
|
|
|
+
|
|
|
|
+ // Get a pointer to the memory pool of the AlignedMemory64. We
|
|
|
|
+ // treat it as a usize explicitly so we can pass the same pointer to
|
|
|
|
+ // multiple threads, each of which will cast it to a *mut u64, in
|
|
|
|
+ // order to *write* into the memory pool concurrently. There is a
|
|
|
|
+ // caveat that the threads *must not* try to write into the same
|
|
|
|
+ // memory location. In Spiral, each polynomial created from the
|
|
|
|
+ // database ends up scattered into noncontiguous words of memory,
|
|
|
|
+ // but any one word still only comes from one polynomial. So with
|
|
|
|
+ // this mechanism, different threads can read different parts of the
|
|
|
|
+ // database to produce different polynomials, and write those
|
|
|
|
+ // polynomials into the same memory pool (but *not* the same memory
|
|
|
|
+ // locations) at the same time.
|
|
|
|
+
|
|
|
|
+ let vptrusize = unsafe { v.as_mut_ptr() as usize };
|
|
|
|
|
|
for instance in 0..instances {
|
|
for instance in 0..instances {
|
|
for trial in 0..trials {
|
|
for trial in 0..trials {
|
|
@@ -69,13 +79,12 @@ pub fn load_db_from_slice_mt(
|
|
let items_this_thread =
|
|
let items_this_thread =
|
|
items_per_thread_base + if thr < items_per_thread_extra { 1 } else { 0 };
|
|
items_per_thread_base + if thr < items_per_thread_extra { 1 } else { 0 };
|
|
let item_thread_end = item_thread_start + items_this_thread;
|
|
let item_thread_end = item_thread_start + items_this_thread;
|
|
- let v = &v;
|
|
|
|
s.spawn(move |_| {
|
|
s.spawn(move |_| {
|
|
- let vptr = unsafe { v.as_mut_ptr() };
|
|
|
|
|
|
+ let vptr = vptrusize as *mut u64;
|
|
for i in item_thread_start..item_thread_end {
|
|
for i in item_thread_start..item_thread_end {
|
|
// Swap the halves of the item index so that
|
|
// Swap the halves of the item index so that
|
|
// the polynomials based on the items are
|
|
// the polynomials based on the items are
|
|
- // written to the AlignedMemoryMT64 more
|
|
|
|
|
|
+ // written to the AlignedMemory64 more
|
|
// sequentially
|
|
// sequentially
|
|
let ii = i / dim0;
|
|
let ii = i / dim0;
|
|
let j = i % dim0;
|
|
let j = i % dim0;
|