|
@@ -504,10 +504,18 @@ void ecall_routing_proceed(void *cbpointer)
|
|
|
|
|
|
// Obliviously tally the number of messages we received in
|
|
|
// round1 destined for each storage node
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ uint32_t inserted = round1.inserted;
|
|
|
+ unsigned long start_round2 = printf_with_rtclock("begin round2 processing (%u,%u)\n", inserted, round1.bufsize);
|
|
|
+ unsigned long start_tally = printf_with_rtclock("begin tally (%u)\n", inserted);
|
|
|
+#endif
|
|
|
uint32_t msg_size = g_teems_config.msg_size;
|
|
|
nodenum_t num_storage_nodes = g_teems_config.num_storage_nodes;
|
|
|
std::vector<uint32_t> tally = obliv_tally_stg(
|
|
|
round1.buf, msg_size, round1.inserted, num_storage_nodes);
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ printf_with_rtclock_diff(start_tally, "end tally (%u)\n", inserted);
|
|
|
+#endif
|
|
|
|
|
|
// Note: tally contains private values! It's OK to
|
|
|
// non-obliviously check for an error condition, though.
|
|
@@ -529,14 +537,27 @@ void ecall_routing_proceed(void *cbpointer)
|
|
|
|
|
|
// Obliviously add padding for each storage node according
|
|
|
// to the (private) padding tally.
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ unsigned long start_pad = printf_with_rtclock("begin pad (%u)\n", tot_padding);
|
|
|
+#endif
|
|
|
obliv_pad_stg(round1.buf + round1.inserted * msg_size,
|
|
|
msg_size, tally, tot_padding);
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ printf_with_rtclock_diff(start_pad, "end pad (%u)\n", tot_padding);
|
|
|
+#endif
|
|
|
|
|
|
round1.inserted += tot_padding;
|
|
|
|
|
|
// Obliviously shuffle the messages
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ unsigned long start_shuffle = printf_with_rtclock("begin shuffle (%u,%u)\n", round1.inserted, round1.bufsize);
|
|
|
+#endif
|
|
|
uint32_t num_shuffled = shuffle_mtobliv(g_teems_config.nthreads,
|
|
|
round1.buf, msg_size, round1.inserted, round1.bufsize);
|
|
|
+#ifdef PROFILE_ROUTING
|
|
|
+ printf_with_rtclock_diff(start_pad, "end shuffle (%u,%u)\n", round1.inserted, round1.bufsize);
|
|
|
+ printf_with_rtclock_diff(start_round2, "end round2 processing (%u,%u)\n", inserted, round1.bufsize);
|
|
|
+#endif
|
|
|
|
|
|
// Now we can handle the messages non-obliviously, since we
|
|
|
// know there will be exactly msgs_per_stg messages to each
|