Parcourir la source

Extract data from the ramen outputs

Ian Goldberg il y a 6 mois
Parent
commit
ab179e9e74
2 fichiers modifiés avec 138 ajouts et 91 suppressions
  1. 0 91
      repro/extract_ramen_stats.py
  2. 138 0
      repro/parse_logs

+ 0 - 91
repro/extract_ramen_stats.py

@@ -1,91 +0,0 @@
-import sys
-import re
-
-def extract_init_time(file_contents):
-    lines = file_contents.split('\n')
-    init_time = None
-
-    for line in lines:
-        if line.startswith("InitTime:"):
-            init_time_str = line.split(":")[1].strip()
-            init_time = float(init_time_str.split()[0])
-            break
-
-    return init_time
-
-def extract_time_accesses(file_contents):
-    time_accesses_section = re.search(r'time accesses:\s+([\d.]+) ms', file_contents)
-    if time_accesses_section:
-        time_accesses = float(time_accesses_section.group(1))
-        return time_accesses
-    else:
-        return None
-
-def extract_max_bytes_sent(file_contents):
-    communication_section = re.search(r'communication accesses: {(.*?)}', file_contents, re.DOTALL)
-    if communication_section:
-        communication_stats = communication_section.group(1)
-        num_bytes_sent = [int(match.group(1)) for match in re.finditer(r'num_bytes_sent: (\d+)', communication_stats)]
-        max_bytes_sent = max(num_bytes_sent)
-        return max_bytes_sent
-    else:
-        return None
-
-def extract_max_time_accesses(file_contents):
-    max_time_accesses_section = re.search(r'Maximum time accesses: (\d+\.\d+) ms', file_contents)
-    if max_time_accesses_section:
-        max_time_accesses = float(max_time_accesses_section.group(1))
-        return max_time_accesses
-    else:
-        return None
-
-def extract_max_of_time_accesses(file_contents):
-    max_of_time_accesses_section = re.search(r'Maximum of time accesses: (\d+\.\d+) ms', file_contents)
-    if max_of_time_accesses_section:
-        max_of_time_accesses = float(max_of_time_accesses_section.group(1))
-        return max_of_time_accesses
-    else:
-        return None
-
-def main():
-    if len(sys.argv) != 2:
-        print("Usage: python script.py <filename>")
-        return
-
-    filename = sys.argv[1]
-    
-    try:
-        with open(filename, 'r') as file:
-            file_contents = file.read()
-            max_init_time = extract_init_time(file_contents)
-            time_accesses = extract_time_accesses(file_contents)
-            max_bytes_sent = extract_max_bytes_sent(file_contents)
-            max_time_accesses = extract_max_time_accesses(file_contents)
-            max_of_time_accesses = extract_max_of_time_accesses(file_contents)
-            
-           # if max_init_time is not None:
-           #     print("Maximum InitTime:", max_init_time, "ms")
-           # else:
-           #     print("No InitTime found in the file.")
-            
-           # if time_accesses is not None:
-           #     print("Time Accesses:", time_accesses, "ms")
-           # else:
-           #     print("No time accesses found in the file.")
-            
-            if max_bytes_sent is not None:
-                print(max_bytes_sent)
-            else:
-                print("No communication stats found in the file.")
-
-            if max_init_time is not None and time_accesses is not None:
-                total_time = max_init_time + time_accesses
-                print(total_time)
-            else:
-                print("Cannot calculate total time due to missing values.")
-    except FileNotFoundError:
-        print("File not found:", filename)
-
-if __name__ == "__main__":
-    main()
-

+ 138 - 0
repro/parse_logs

@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+# Parse the log output files of run-experiment
+# Send concatenated log files to stdin, or list them on the command
+# line.
+
+use strict;
+
+my $mode = '';
+my $depth = 0;
+my $numops = 0;
+my $who = 0;
+my @initsecs = (0, 0, 0);
+my @seconds = (0, 0, 0);
+my @bytes = (0, 0, 0);
+my $netsetup = '';
+my %ramen_s_data = ();
+my %ramen_kib_data = ();
+
+while(<>) {
+    chomp;
+    if (/Network setup: (.*)/) {
+        $netsetup = "$1 ";
+        next;
+    }
+    if (/===== Running ramen (\d+) (\d+)/) {
+        $depth = $1;
+        $numops = $2;
+        @initsecs = (0,0,0);
+        @seconds = (0,0,0);
+        @bytes = (0,0,0);
+        next;
+    }
+    if (/===== P([012]) output/) {
+        $who = $1;
+        next;
+    }
+    if (/^InitTime: (\d+\.?\d*) ms/) {
+        $initsecs[$who] = $1/1000;
+        next;
+    }
+    if (/^time accesses:\s+(\d+\.?\d*) ms/) {
+        $seconds[$who] = $1/1000;
+        next;
+    }
+    if (/num_bytes_sent: (\d+)/) {
+        $bytes[$who] += $1;
+        next;
+    }
+    if (/===== End/) {
+        my $label = "Ramen read $depth $numops";
+        my $maxinitsecs = &max3($initsecs[0], $initsecs[1], $initsecs[2]);
+        my $maxsecs = &max3($seconds[0], $seconds[1], $seconds[2]);
+        &accum_data(\%ramen_s_data, $label, $maxinitsecs + $maxsecs);
+        # The bytes values are for each party, so average them to get the
+        # average communication for each party
+        my $avgbytes = ($bytes[0] + $bytes[1] + $bytes[2]) / 3;
+        my $kib = $avgbytes / 1024;
+        &accum_data(\%ramen_kib_data, $label, $kib);
+    }
+}
+
+# Convert the data (in the form [n, sum, sum_squares]) to statistics (in
+# the form [mean, variance])
+my %ramen_s_stats = ();
+my %ramen_kib_stats = ();
+&statsify(\%ramen_s_stats, \%ramen_s_data);
+&statsify(\%ramen_kib_stats, \%ramen_kib_data);
+
+# Output the data
+&output_stats(\%ramen_s_stats, "s");
+&output_stats(\%ramen_kib_stats, "KiB");
+
+# Subroutines
+
+sub max3 {
+    my $m = $_[0];
+    $m = $_[1] if $_[1] > $m;
+    $m = $_[2] if $_[2] > $m;
+    $m;
+}
+
+# Pass:
+# - a reference to a dictionary
+# - the key into that dictionary
+# - the new data point
+# Data is stored in the dictionary as a triple (n, sum, sum_squares)
+sub accum_data {
+    my ($dict, $key, $data) = @_;
+    $dict->{$key} = [0, 0, 0] unless defined $dict->{$key};
+    $dict->{$key}->[0] += 1;
+    $dict->{$key}->[1] += $data;
+    $dict->{$key}->[2] += ($data * $data);
+}
+
+# Convert data (in the form [n, sum, sum_squares]) to statistics (in
+# the form [mean, variance])
+sub statsify {
+    my ($sdict, $ddict) = @_;
+    my $key;
+    foreach $key (keys %$ddict) {
+        my $data = $ddict->{$key};
+        my $n = $data->[0];
+        my $sum = $data->[1];
+        my $sumsq = $data->[2];
+        if ($n == 0) {
+            $sdict->{$key} = [undef, undef];
+        } elsif ($n == 1) {
+            $sdict->{$key} = [$sum, undef];
+        } else {
+            $sdict->{$key} = [$sum/$n, ($sumsq - ($sum*$sum/$n))/($n-1)];
+        }
+    }
+}
+
+# Turn a stat array [mean, variance] into a string to display
+sub statstr {
+    my $data = $_[0];
+    if (defined $data->[1]) {
+        my $mean = $data->[0];
+        my $stddev = $data->[1] > 0 ? sqrt($data->[1]) : 0;
+        return "$mean ± $stddev";
+    } elsif (defined $data->[0]) {
+        return $data->[0];
+    } else {
+        return "none"
+    }
+}
+
+# Output the stats in the given dictionary. Append $phase to the
+# protocol name, and add $units to the end.
+sub output_stats {
+    my ($dict, $units) = @_;
+    my $label;
+    foreach $label (sort keys %$dict) {
+        print $label, " ", &statstr($dict->{$label}), " $units\n";
+    }
+}