#!/usr/bin/perl

# Parse the log output files of run-experiment
# Send concatenated log files to stdin, or list them on the command
# line.

use strict;

my $mode = '';
my $depth = 0;
my $numops = 0;
my $phase = '';
my $numparties = '';
my $what = '';
my $setupsize = 0;
my $opsize = 0;
my @seconds = (0, 0);
my $netsetup = '';
my %preproc_s_data = ();
my %preproc_kib_data = ();
my %online_s_data = ();
my %online_kib_data = ();

while(<>) {
    chomp;
    if (/Network setup: (.*)/) {
        $netsetup = "$1 ";
        next;
    }
    if (/===== Running duoram (\S+) (\d+) (\d+) (\S+) ([23]P)/) {
        $mode = $1;
        $depth = $2;
        $numops = $3;
        $phase = $4;
        $numparties = $5;
        my $label = "${numparties}Duoram $mode $depth $netsetup$numops";
        if ($mode eq "read" && $numparties eq "2P") {
            &parse_2P_read($label);
        } elsif ($mode eq "write" && $numparties eq "2P" && $phase eq "preproc") {
            &parse_2P_write_preproc($label);
        } elsif ($mode eq "write" && $numparties eq "2P" && $phase eq "online") {
            &parse_2P_write_online($label);
        } elsif ($numparties eq "3P" && $phase eq "preproc") {
            &parse_3P_preproc("$depth $netsetup$numops");
        } elsif ($numparties eq "3P" && $phase eq "online") {
            &parse_3P_online("$depth $netsetup$numops");
        }
        @seconds = (0,0);
        $what = '';
        $setupsize = 0;
        $opsize = 0;
        next;
    }
}

# Convert the data (in the form [n, sum, sum_squares]) to statistics (in
# the form [mean, variance])
my %preproc_s_stats = ();
my %preproc_kib_stats = ();
my %online_s_stats = ();
my %online_kib_stats = ();
&statsify(\%preproc_s_stats, \%preproc_s_data);
&statsify(\%preproc_kib_stats, \%preproc_kib_data);
&statsify(\%online_s_stats, \%online_s_data);
&statsify(\%online_kib_stats, \%online_kib_data);

# The 2P readwrite values are just the sums of the read and the write values
&sum_readwrite_2P(\%preproc_s_stats);
&sum_readwrite_2P(\%preproc_kib_stats);
&sum_readwrite_2P(\%online_s_stats);
&sum_readwrite_2P(\%online_kib_stats);

# The total values are the sums of the preproc and online values
my %total_s_stats = ();
my %total_kib_stats = ();
&sum_preproc_online(\%total_s_stats, \%preproc_s_stats, \%online_s_stats);
&sum_preproc_online(\%total_kib_stats, \%preproc_kib_stats, \%online_kib_stats);

# Output the data
&output_stats(\%preproc_s_stats, "Preprc", "s");
&output_stats(\%preproc_kib_stats, "Preprc", "KiB");
&output_stats(\%online_s_stats, "Onln", "s");
&output_stats(\%online_kib_stats, "Onln", "KiB");
&output_stats(\%total_s_stats, "Totl", "s");
&output_stats(\%total_kib_stats, "Totl", "KiB");

# Subroutines

sub parse_2P_read {
    my $label = $_[0];
    my $who = 0;
    my @preproc_s = ();
    my @preproc_kib = ();
    my @online_s = ();
    my @online_kib = ();
    while(<>) {
        last if /===== End/;
        # Try to recover from a malformed log
        last if /^Max GB:/;
        # It was too malformed
        die "Malformed log" if /===== Running/;
        if (/===== P([012]) output/) {
            $who = $1;
            next;
        }
        if (/Total preprocessing time: (\d+) µs/) {
            $preproc_s[$who] = $1 / 1000000;
        }
        if (/Total preprocessing bytes: (\d+)/) {
            $preproc_kib[$who] = $1 / 1024;
        }
        if (/Total query time: (\d+) µs/) {
            $online_s[$who] = $1 / 1000000;
        }
        if (/Total query bytes: (\d+)/) {
            $online_kib[$who] = $1 / 1024;
        }
    }
    return if $#preproc_s < 0;
    &accum_data(\%preproc_s_data, $label, &maxarray(@preproc_s));
    &accum_data(\%preproc_kib_data, $label, &avgarray(@preproc_kib));
    &accum_data(\%online_s_data, $label, &maxarray(@online_s));
    &accum_data(\%online_kib_data, $label, &avgarray(@online_kib));
}

sub parse_2P_write_preproc {
    my $label = $_[0];
    my $who = 0;
    my @preproc_s = ();
    my @preproc_kib = ();
    while(<>) {
        last if /===== End/;
        # Try to recover from a malformed log
        last if /^Max GB:/;
        # It was too malformed
        die "Malformed log" if /===== Running/;
        if (/===== P([012]) output/) {
            $who = $1;
            next;
        }
        if (/time to generate and evaluate \d+ OTs is: (\d+\.?\d*+)s/) {
            $preproc_s[$who] = $1;
        }
        if (/bytes transmitted for OT = (\d+) bytes/) {
            $preproc_kib[$who] = $1 / 1024;
        }
        if (/WallClockTime: (\d+\.?\d*) s/) {
            $preproc_s[$who] += $1;
        }
        if (/CommunicationCost: (\d+) bytes/) {
            $preproc_kib[$who] += $1 / 1024;
        }
    }
    return if $#preproc_s < 0;
    &accum_data(\%preproc_s_data, $label, &maxarray(@preproc_s));
    &accum_data(\%preproc_kib_data, $label, &avgarray(@preproc_kib));
}

sub parse_2P_write_online {
    my $label = $_[0];
    my $who = 0;
    my @online_s = ();
    my @online_kib = ();
    while(<>) {
        last if /===== End/;
        # Try to recover from a malformed log
        last if /^Max GB:/;
        # It was too malformed
        die "Malformed log" if /===== Running/;
        if (/===== P([012]) output/) {
            $who = $1;
            next;
        }
        if (/write_time = (\d+\.?\d*)/) {
            $online_s[$who] = $1;
        }
        if (/communication_cost_writes = (\d+) bytes/) {
            $online_kib[$who] = $1 / 1024;
        }
    }
    return if $#online_s < 0;
    &accum_data(\%online_s_data, $label, &maxarray(@online_s));
    &accum_data(\%online_kib_data, $label, &avgarray(@online_kib));
}

sub parse_3P_preproc {
    my $labelsuffix = $_[0];
    my $who = 0;
    my @preproc_s = ();
    my @preproc_kib = ();
    while(<>) {
        if (/===== P([012]) output/) {
            $who = $1;
            next;
        }
        last if /===== End/;
        # Try to recover from a malformed log
        last if /^Max GB:/;
        # It was too malformed
        die "Malformed log" if /===== Running/;
        if (/WallClockTime: (\d+\.?\d*)/) {
            $preproc_s[$who] = $1;
        }
        if (/CommunicationCost: (\d+) bytes/) {
            $preproc_kib[$who] = $1 / 1024;
        }
    }
    return if $#preproc_s < 0;
    &accum_data(\%preproc_s_data, "3PDuoram read $labelsuffix", &maxarray(@preproc_s));
    &accum_data(\%preproc_kib_data, "3PDuoram read $labelsuffix", &avgarray(@preproc_kib));
    &accum_data(\%preproc_s_data, "3PDuoram write $labelsuffix", &maxarray(@preproc_s));
    &accum_data(\%preproc_kib_data, "3PDuoram write $labelsuffix", &avgarray(@preproc_kib));
    &accum_data(\%preproc_s_data, "3PDuoram readwrite $labelsuffix", 2*&maxarray(@preproc_s));
    &accum_data(\%preproc_kib_data, "3PDuoram readwrite $labelsuffix", 2*&avgarray(@preproc_kib));
}

sub parse_3P_online {
    my $labelsuffix = $_[0];
    my $who = 0;
    my @read_s = ();
    my @read_kib = ();
    my @write_s = ();
    my @write_kib = ();
    my @readwrite_s = ();
    my @readwrite_kib = ();
    while(<>) {
        last if /===== End/;
        # Try to recover from a malformed log
        last if /^Max GB:/;
        # It was too malformed
        die "Malformed log" if /===== Running/;
        if (/===== P([012]) output/) {
            $who = $1;
            next;
        }
        if (/write_time =\s+(\d+\.?\d*)/) {
            $write_s[$who] = $1;
        }
        if (/communication_cost_writes = (\d+) bytes/) {
            $write_kib[$who] = $1 / 1024;
        }
        if (/dependent_read_time =\s+(\d+\.?\d*)/) {
            $read_s[$who] = $1;
        }
        if (/communication_cost_dep_read = (\d+) bytes/) {
            $read_kib[$who] = $1 / 1024;
        }
        if (/interleaved_time =\s+(\d+\.?\d*)/) {
            $readwrite_s[$who] = $1;
        }
        if (/communication_cost_interleaved = (\d+) bytes/) {
            $readwrite_kib[$who] = $1 / 1024;
        }
    }
    return if $#read_s < 0;
    &accum_data(\%online_s_data, "3PDuoram read $labelsuffix", &maxarray(@read_s));
    &accum_data(\%online_kib_data, "3PDuoram read $labelsuffix", &avgarray(@read_kib));
    &accum_data(\%online_s_data, "3PDuoram write $labelsuffix", &maxarray(@write_s));
    &accum_data(\%online_kib_data, "3PDuoram write $labelsuffix", &avgarray(@write_kib));
    &accum_data(\%online_s_data, "3PDuoram readwrite $labelsuffix", &maxarray(@readwrite_s));
    &accum_data(\%online_kib_data, "3PDuoram readwrite $labelsuffix", &avgarray(@readwrite_kib));
}

sub maxarray {
    my $max = $_[0];
    foreach (@_) {
        $max = $_ if $_ > $max;
    }
    $max;
}

sub avgarray {
    my $sum = 0;
    my $n = 0;
    foreach (@_) {
        $sum += $_;
        $n += 1;
    }
    $sum / $n;
}

# Pass:
# - a reference to a dictionary
# - the key into that dictionary
# - the new data point
# Data is stored in the dictionary as a triple (n, sum, sum_squares)
sub accum_data {
    my ($dict, $key, $data) = @_;
    $dict->{$key} = [0, 0, 0] unless defined $dict->{$key};
    $dict->{$key}->[0] += 1;
    $dict->{$key}->[1] += $data;
    $dict->{$key}->[2] += ($data * $data);
}

# Convert data (in the form [n, sum, sum_squares]) to statistics (in
# the form [mean, variance])
sub statsify {
    my ($sdict, $ddict) = @_;
    my $key;
    foreach $key (keys %$ddict) {
        my $data = $ddict->{$key};
        my $n = $data->[0];
        my $sum = $data->[1];
        my $sumsq = $data->[2];
        if ($n == 0) {
            $sdict->{$key} = [undef, undef];
        } elsif ($n == 1) {
            $sdict->{$key} = [$sum, undef];
        } else {
            $sdict->{$key} = [$sum/$n, ($sumsq - ($sum*$sum/$n))/($n-1)];
        }
    }
}

# Turn a stat array [mean, variance] into a string to display
sub statstr {
    my $data = $_[0];
    if (defined $data->[1]) {
        my $mean = $data->[0];
        my $stddev = $data->[1] > 0 ? sqrt($data->[1]) : 0;
        return "$mean ± $stddev";
    } elsif (defined $data->[0]) {
        return $data->[0];
    } else {
        return "none"
    }
}

# Sum two stat arrays
sub statsum {
    my ($data0, $data1) = @_;
    if (defined $data0->[1] && defined $data1->[1]) {
        return [$data0->[0] + $data1->[0], $data0->[1] + $data1->[1]];
    } else {
        return [$data0->[0] + $data1->[0], undef];
    }
}

# The 2P readwrite values are just the sums of the read and the write values
sub sum_readwrite_2P {
    my $dict = $_[0];
    my $key;
    foreach $key (keys %$dict) {
        if ($key =~ /^2PDuoram read /) {
            my $writekey = $key;
            my $readwritekey = $key;
            $writekey =~ s/^2PDuoram read /2PDuoram write /;
            $readwritekey =~ s/^2PDuoram read /2PDuoram readwrite /;
            if (defined $dict->{$writekey}) {
                $dict->{$readwritekey} = &statsum($dict->{$key}, $dict->{$writekey});
            } else {
                # It's OK if we're missing the data point, since we
                # don't necessary parse all the data at once.
                # print "Missing data point for $writekey\n";
            }
        }
    }
}

# Add the preproc and online stats to get the total stats
sub sum_preproc_online {
    my ($tdict, $pdict, $odict) = @_;
    my $key;
    foreach $key (keys %$pdict) {
        if (defined $odict->{$key}) {
            $tdict->{$key} = &statsum($pdict->{$key}, $odict->{$key});
        }
    }
}

# Output the stats in the given dictionary. Append $phase to the
# protocol name, and add $units to the end.
sub output_stats {
    my ($dict, $phase, $units) = @_;
    my $label;
    foreach $label (sort keys %$dict) {
        my $printlabel = $label;
        $printlabel =~ s/PDuoram/PDuoram$phase/;
        print $printlabel, " ", &statstr($dict->{$label}), " $units\n";
    }
}