|
@@ -71,9 +71,11 @@ bool route_init()
|
|
max_round2_msgs = max_round1_msgs;
|
|
max_round2_msgs = max_round1_msgs;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ // The max number of messages that can arrive at a storage server
|
|
|
|
+ uint32_t max_stg_msgs = tot_msg_per_stg + g_teems_config.tot_weight;
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- printf("round1_msgs = %u, round2_msgs = %u\n",
|
|
|
|
- max_round1_msgs, max_round2_msgs);
|
|
|
|
|
|
+ printf("users_per_ing=%u, tot_msg_per_ing=%u, max_msg_from_each_ing=%u, max_round1_msgs=%u, users_per_stg=%u, tot_msg_per_stg=%u, max_msg_to_each_stg=%u, max_round2_msgs=%u, max_stg_msgs=%u\n", users_per_ing, tot_msg_per_ing, max_msg_from_each_ing, max_round1_msgs, users_per_stg, tot_msg_per_stg, max_msg_to_each_stg, max_round2_msgs, max_stg_msgs);
|
|
*/
|
|
*/
|
|
|
|
|
|
// Create the route state
|
|
// Create the route state
|
|
@@ -86,8 +88,10 @@ bool route_init()
|
|
route_state.round1.alloc(max_round2_msgs);
|
|
route_state.round1.alloc(max_round2_msgs);
|
|
}
|
|
}
|
|
if (my_roles & ROLE_STORAGE) {
|
|
if (my_roles & ROLE_STORAGE) {
|
|
- route_state.round2.alloc(tot_msg_per_stg +
|
|
|
|
- g_teems_config.tot_weight);
|
|
|
|
|
|
+ route_state.round2.alloc(max_stg_msgs);
|
|
|
|
+ if (!storage_init(users_per_stg, max_stg_msgs)) {
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
} catch (std::bad_alloc&) {
|
|
} catch (std::bad_alloc&) {
|
|
printf("Memory allocation failed in route_init\n");
|
|
printf("Memory allocation failed in route_init\n");
|
|
@@ -97,14 +101,22 @@ bool route_init()
|
|
route_state.tot_msg_per_ing = tot_msg_per_ing;
|
|
route_state.tot_msg_per_ing = tot_msg_per_ing;
|
|
route_state.max_msg_to_each_stg = max_msg_to_each_stg;
|
|
route_state.max_msg_to_each_stg = max_msg_to_each_stg;
|
|
route_state.max_round2_msgs = max_round2_msgs;
|
|
route_state.max_round2_msgs = max_round2_msgs;
|
|
|
|
+ route_state.max_stg_msgs = max_stg_msgs;
|
|
route_state.cbpointer = NULL;
|
|
route_state.cbpointer = NULL;
|
|
|
|
|
|
threadid_t nthreads = g_teems_config.nthreads;
|
|
threadid_t nthreads = g_teems_config.nthreads;
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
unsigned long start = printf_with_rtclock("begin precompute evalplans (%u,%hu) (%u,%hu)\n", tot_msg_per_ing, nthreads, max_round2_msgs, nthreads);
|
|
unsigned long start = printf_with_rtclock("begin precompute evalplans (%u,%hu) (%u,%hu)\n", tot_msg_per_ing, nthreads, max_round2_msgs, nthreads);
|
|
#endif
|
|
#endif
|
|
- sort_precompute_evalplan(tot_msg_per_ing, nthreads);
|
|
|
|
- sort_precompute_evalplan(max_round2_msgs, nthreads);
|
|
|
|
|
|
+ if (my_roles & ROLE_INGESTION) {
|
|
|
|
+ sort_precompute_evalplan(tot_msg_per_ing, nthreads);
|
|
|
|
+ }
|
|
|
|
+ if (my_roles & ROLE_ROUTING) {
|
|
|
|
+ sort_precompute_evalplan(max_round2_msgs, nthreads);
|
|
|
|
+ }
|
|
|
|
+ if (my_roles & ROLE_STORAGE) {
|
|
|
|
+ sort_precompute_evalplan(max_stg_msgs, nthreads);
|
|
|
|
+ }
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
printf_with_rtclock_diff(start, "end precompute evalplans\n");
|
|
printf_with_rtclock_diff(start, "end precompute evalplans\n");
|
|
#endif
|
|
#endif
|
|
@@ -112,38 +124,45 @@ bool route_init()
|
|
}
|
|
}
|
|
|
|
|
|
// Precompute the WaksmanNetworks needed for the sorts. If you pass -1,
|
|
// Precompute the WaksmanNetworks needed for the sorts. If you pass -1,
|
|
-// it will return the number of different sizes it needs. If you pass
|
|
|
|
-// [0,sizes-1], it will compute one WaksmanNetwork with that size index
|
|
|
|
-// and return the number of available WaksmanNetworks of that size.
|
|
|
|
|
|
+// it will return the number of different sizes it needs to regenerate.
|
|
|
|
+// If you pass [0,sizes-1], it will compute one WaksmanNetwork with that
|
|
|
|
+// size index and return the number of available WaksmanNetworks of that
|
|
|
|
+// size. If you pass anything else, it will return the number of
|
|
|
|
+// different sizes it needs at all.
|
|
|
|
+
|
|
|
|
+// The list of sizes that need refilling, updated when you pass -1
|
|
|
|
+static std::vector<uint32_t> used_sizes;
|
|
|
|
|
|
size_t ecall_precompute_sort(int sizeidx)
|
|
size_t ecall_precompute_sort(int sizeidx)
|
|
{
|
|
{
|
|
size_t ret = 0;
|
|
size_t ret = 0;
|
|
|
|
|
|
- switch(sizeidx) {
|
|
|
|
- case 0:
|
|
|
|
-#ifdef PROFILE_ROUTING
|
|
|
|
- {unsigned long start = printf_with_rtclock("begin precompute WaksmanNetwork (%u)\n", route_state.tot_msg_per_ing);
|
|
|
|
-#endif
|
|
|
|
- ret = sort_precompute(route_state.tot_msg_per_ing);
|
|
|
|
-#ifdef PROFILE_ROUTING
|
|
|
|
- printf_with_rtclock_diff(start, "end precompute Waksman Network (%u)\n", route_state.tot_msg_per_ing);}
|
|
|
|
-#endif
|
|
|
|
- break;
|
|
|
|
- case 1:
|
|
|
|
|
|
+ if (sizeidx == -1) {
|
|
|
|
+ used_sizes = sort_get_used();
|
|
|
|
+ ret = used_sizes.size();
|
|
|
|
+ } else if (sizeidx >= 0 && sizeidx < used_sizes.size()) {
|
|
|
|
+ uint32_t size = used_sizes[sizeidx];
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
- {unsigned long start = printf_with_rtclock("begin precompute WaksmanNetwork (%u)\n", route_state.max_round2_msgs);
|
|
|
|
|
|
+ unsigned long start = printf_with_rtclock("begin precompute WaksmanNetwork (%u)\n", size);
|
|
#endif
|
|
#endif
|
|
- ret = sort_precompute(route_state.max_round2_msgs);
|
|
|
|
|
|
+ ret = sort_precompute(size);
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
- printf_with_rtclock_diff(start, "end precompute Waksman Network (%u)\n", route_state.max_round2_msgs);}
|
|
|
|
|
|
+ printf_with_rtclock_diff(start, "end precompute Waksman Network (%u)\n", size);
|
|
#endif
|
|
#endif
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- ret = 2;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
|
|
+ } else {
|
|
|
|
+ uint8_t my_roles = g_teems_config.roles[g_teems_config.my_node_num];
|
|
|
|
|
|
|
|
+ if (my_roles & ROLE_INGESTION) {
|
|
|
|
+ used_sizes.push_back(route_state.tot_msg_per_ing);
|
|
|
|
+ }
|
|
|
|
+ if (my_roles & ROLE_ROUTING) {
|
|
|
|
+ used_sizes.push_back(route_state.max_round2_msgs);
|
|
|
|
+ }
|
|
|
|
+ if (my_roles & ROLE_STORAGE) {
|
|
|
|
+ used_sizes.push_back(route_state.max_stg_msgs);
|
|
|
|
+ }
|
|
|
|
+ ret = used_sizes.size();
|
|
|
|
+ }
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -330,7 +349,7 @@ bool ecall_ingest_raw(uint8_t *msgs, uint32_t num_msgs)
|
|
}
|
|
}
|
|
|
|
|
|
// Send the round 1 messages. Note that N here is not private.
|
|
// Send the round 1 messages. Note that N here is not private.
|
|
-static void send_round1_msgs(const uint8_t *msgs, const uint64_t *indices,
|
|
|
|
|
|
+static void send_round1_msgs(const uint8_t *msgs, const UidKey *indices,
|
|
uint32_t N)
|
|
uint32_t N)
|
|
{
|
|
{
|
|
uint16_t msg_size = g_teems_config.msg_size;
|
|
uint16_t msg_size = g_teems_config.msg_size;
|
|
@@ -384,15 +403,15 @@ static void send_round1_msgs(const uint8_t *msgs, const uint64_t *indices,
|
|
uint8_t *buf = round1.buf + start * msg_size;
|
|
uint8_t *buf = round1.buf + start * msg_size;
|
|
|
|
|
|
for (uint32_t i=0; i<full_rows; ++i) {
|
|
for (uint32_t i=0; i<full_rows; ++i) {
|
|
- const uint64_t *idxp = indices + i*tot_weight + start_weight;
|
|
|
|
|
|
+ const UidKey *idxp = indices + i*tot_weight + start_weight;
|
|
for (uint32_t j=0; j<weight; ++j) {
|
|
for (uint32_t j=0; j<weight; ++j) {
|
|
- memmove(buf, msgs + idxp[j]*msg_size, msg_size);
|
|
|
|
|
|
+ memmove(buf, msgs + idxp[j].index()*msg_size, msg_size);
|
|
buf += msg_size;
|
|
buf += msg_size;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- const uint64_t *idxp = indices + full_rows*tot_weight + start_weight;
|
|
|
|
|
|
+ const UidKey *idxp = indices + full_rows*tot_weight + start_weight;
|
|
for (uint32_t j=0; j<num_msgs_last_row; ++j) {
|
|
for (uint32_t j=0; j<num_msgs_last_row; ++j) {
|
|
- memmove(buf, msgs + idxp[j]*msg_size, msg_size);
|
|
|
|
|
|
+ memmove(buf, msgs + idxp[j].index()*msg_size, msg_size);
|
|
buf += msg_size;
|
|
buf += msg_size;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -405,14 +424,14 @@ static void send_round1_msgs(const uint8_t *msgs, const uint64_t *indices,
|
|
NodeCommState &nodecom = g_commstates[routing_node];
|
|
NodeCommState &nodecom = g_commstates[routing_node];
|
|
nodecom.message_start(num_msgs * msg_size);
|
|
nodecom.message_start(num_msgs * msg_size);
|
|
for (uint32_t i=0; i<full_rows; ++i) {
|
|
for (uint32_t i=0; i<full_rows; ++i) {
|
|
- const uint64_t *idxp = indices + i*tot_weight + start_weight;
|
|
|
|
|
|
+ const UidKey *idxp = indices + i*tot_weight + start_weight;
|
|
for (uint32_t j=0; j<weight; ++j) {
|
|
for (uint32_t j=0; j<weight; ++j) {
|
|
- nodecom.message_data(msgs + idxp[j]*msg_size, msg_size);
|
|
|
|
|
|
+ nodecom.message_data(msgs + idxp[j].index()*msg_size, msg_size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- const uint64_t *idxp = indices + full_rows*tot_weight + start_weight;
|
|
|
|
|
|
+ const UidKey *idxp = indices + full_rows*tot_weight + start_weight;
|
|
for (uint32_t j=0; j<num_msgs_last_row; ++j) {
|
|
for (uint32_t j=0; j<num_msgs_last_row; ++j) {
|
|
- nodecom.message_data(msgs + idxp[j]*msg_size, msg_size);
|
|
|
|
|
|
+ nodecom.message_data(msgs + idxp[j].index()*msg_size, msg_size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -502,7 +521,7 @@ void ecall_routing_proceed(void *cbpointer)
|
|
unsigned long start_round1 = printf_with_rtclock("begin round1 processing (%u)\n", inserted);
|
|
unsigned long start_round1 = printf_with_rtclock("begin round1 processing (%u)\n", inserted);
|
|
unsigned long start_sort = printf_with_rtclock("begin oblivious sort (%u,%u)\n", inserted, route_state.tot_msg_per_ing);
|
|
unsigned long start_sort = printf_with_rtclock("begin oblivious sort (%u,%u)\n", inserted, route_state.tot_msg_per_ing);
|
|
#endif
|
|
#endif
|
|
- sort_mtobliv(g_teems_config.nthreads, ingbuf.buf,
|
|
|
|
|
|
+ sort_mtobliv<UidKey>(g_teems_config.nthreads, ingbuf.buf,
|
|
g_teems_config.msg_size, ingbuf.inserted,
|
|
g_teems_config.msg_size, ingbuf.inserted,
|
|
route_state.tot_msg_per_ing, send_round1_msgs);
|
|
route_state.tot_msg_per_ing, send_round1_msgs);
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
@@ -657,14 +676,11 @@ void ecall_routing_proceed(void *cbpointer)
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
unsigned long start = printf_with_rtclock("begin storage processing (%u)\n", round2.inserted);
|
|
unsigned long start = printf_with_rtclock("begin storage processing (%u)\n", round2.inserted);
|
|
#endif
|
|
#endif
|
|
- storage_received(round2.buf, round2.inserted);
|
|
|
|
|
|
+ storage_received(round2);
|
|
#ifdef PROFILE_ROUTING
|
|
#ifdef PROFILE_ROUTING
|
|
printf_with_rtclock_diff(start, "end storage processing (%u)\n", round2.inserted);
|
|
printf_with_rtclock_diff(start, "end storage processing (%u)\n", round2.inserted);
|
|
#endif
|
|
#endif
|
|
|
|
|
|
- round2.reset();
|
|
|
|
- pthread_mutex_unlock(&round2.mutex);
|
|
|
|
-
|
|
|
|
// We're done
|
|
// We're done
|
|
route_state.step = ROUTE_NOT_STARTED;
|
|
route_state.step = ROUTE_NOT_STARTED;
|
|
ocall_routing_round_complete(cbpointer, 0);
|
|
ocall_routing_round_complete(cbpointer, 0);
|