From 355fabe56df527cc68688f179b1c8e25e3082308 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Tue, 12 Jul 2022 14:46:01 -0700
Subject: [PATCH 01/15] Make automated benchmarks easier.

---
 runner/collect_results.py |   4 +-
 runner/make-graphics.py   | 100 ++++++++++++++++++++++++++++++++++++++
 runner/run_benchmark.py   |   4 +-
 3 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100755 runner/make-graphics.py

diff --git a/runner/collect_results.py b/runner/collect_results.py
index b6c158c..8eac184 100755
--- a/runner/collect_results.py
+++ b/runner/collect_results.py
@@ -10,8 +10,8 @@
 import json
 
 
-def dir_path(string):
-    if os.path.isdir(string):
+def src_path_type(string):
+    if os.path.isdir(string) or string == "latest":
         return string
     else:
         raise NotADirectoryError(string)
diff --git a/runner/make-graphics.py b/runner/make-graphics.py
new file mode 100755
index 0000000..0e0cdac
--- /dev/null
+++ b/runner/make-graphics.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+
+from typing import List, Tuple
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+import numpy as np
+import argparse
+
+FONT = {"family": "serif", "size": 18}
+LARGE_FONT = 28
+
+STYLES = [
+    ("*", "magenta", "magenta"),
+    ("x", "cyan", "green"),
+    ("o", "yellow", "orange"),
+    ("v", "blue", "purple"),
+    ("+", "pink", "red"),
+    ("*", "brown", "brown"),
+    (".", "orange", "orange"),
+    ("x", "teal", "teal"),
+]
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("src_path")
+    parser.add_argument("out_path")
+    args = parser.parse_args()
+    df = load_df(args.src_path)
+    render(df, args.out_path)
+
+
+def load_df(src_path: str) -> pd.DataFrame:
+    df = pd.read_csv(src_path)
+    target = df.target.iloc[0].replace("lf-", "").upper()
+    df["runtime_version"] = (
+        [f"{target} {v}" for v in df.scheduler]
+        if "scheduler" in df.columns
+        else [target] * len(df.index)
+    )
+    return df
+
+
+def compute_legend(runtime_versions: str) -> List[Tuple[str, str, str, str]]:
+    assert len(STYLES) >= len(runtime_versions)
+    return [(a, *b) for a, b in zip(sorted(runtime_versions), STYLES)]
+
+
+def render(df: pd.DataFrame, out_path: str):
+    matplotlib.rc("font", **FONT)
+    fig, axes = plt.subplots(6, 4)
+    fig.set_size_inches(30, 45)
+    axes = axes.ravel()
+    x = sorted(list(df.threads.unique()))
+    df_numbers = df[np.isfinite(df.mean_time_ms)]
+    for ax, benchmark in zip(axes, sorted(list(df.benchmark.unique()))):
+        df_benchmark = df_numbers[df_numbers.benchmark == benchmark]
+        top = 1.3 * df_benchmark.mean_time_ms.max()
+        for version, marker, linecolor, markercolor in compute_legend(
+            df.runtime_version.unique()
+        ):
+            df_benchmark_scheduler = df_benchmark[
+                df_benchmark.runtime_version == version
+            ]
+            ax.set_title(benchmark)
+            ax.set_xticks(x)
+            ax.set_ylim(bottom=0, top=top)
+            (line,) = ax.plot(
+                x,
+                [
+                    df_benchmark_scheduler[
+                        df_benchmark_scheduler.threads == threads
+                    ].mean_time_ms.mean()
+                    for threads in x
+                ],
+                marker=marker,
+                ms=12,
+                linewidth=2,
+                c=linecolor,
+                markeredgecolor=markercolor,
+            )
+            line.set_label(version)
+        ax.legend()
+    ax = fig.add_subplot(111, frameon=False)
+    ax.xaxis.label.set_fontsize(LARGE_FONT)
+    ax.yaxis.label.set_fontsize(LARGE_FONT)
+    ax.title.set_fontsize(LARGE_FONT)
+    ax.set_facecolor("white")
+    plt.rc("font", size=LARGE_FONT)
+    plt.tick_params(labelcolor="none", top=False, bottom=False, left=False, right=False)
+    plt.title("Comparison of Scheduler Versions\n")
+    plt.xlabel("Number of Threads")
+    plt.ylabel("Mean Time (milliseconds)\n")
+    fig.patch.set_facecolor("white")
+    fig.savefig(out_path, transparent=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index 9f6894f..e17c8b9 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -72,7 +72,8 @@ def resolve_args(config_key):
     for step in ["prepare", "copy", "gen", "compile"]:
         if target[step] is not None:
             _, code = execute_command(target[step])
-            check_return_code(code, continue_on_error)
+            if not check_return_code(code, continue_on_error):
+                return
 
     # run the benchmark
     if target["run"] is not None:
@@ -107,6 +108,7 @@ def check_return_code(code, continue_on_error):
             raise RuntimeError(
                 f"Command returned with non-zero exit code ({code})"
             )
+    return code == 0
 
 def check_benchmark_target_config(benchmark, target_name):
     benchmark_name = benchmark["name"]

From d0916fdaa08fd988ca6cdc9f375fe77bf672f1d9 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Tue, 12 Jul 2022 15:06:40 -0700
Subject: [PATCH 02/15] Produce image URI.

---
 runner/make-graphics.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 0e0cdac..21f998e 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -6,6 +6,7 @@
 import matplotlib
 import numpy as np
 import argparse
+import base64
 
 FONT = {"family": "serif", "size": 18}
 LARGE_FONT = 28
@@ -26,9 +27,12 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("src_path")
     parser.add_argument("out_path")
+    parser.add_argument("--uri", dest="uri", action="store_true")
     args = parser.parse_args()
     df = load_df(args.src_path)
     render(df, args.out_path)
+    if args.uri:
+        print_uri(args.out_path)
 
 
 def load_df(src_path: str) -> pd.DataFrame:
@@ -96,5 +100,12 @@ def render(df: pd.DataFrame, out_path: str):
     fig.savefig(out_path, transparent=False)
 
 
+def print_uri(image_path: str):
+    print(
+        f"::set-output name=DATA_URI::data:image/png;base64,"
+        f"{base64.b64encode(open(image_path, 'rb').read())}"
+    )
+
+
 if __name__ == "__main__":
     main()

From 7821ae2b2ab863701d18ab3bae7a570e113dbe5f Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Tue, 12 Jul 2022 15:30:37 -0700
Subject: [PATCH 03/15] Add matplotlib dependency.

---
 runner/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/runner/requirements.txt b/runner/requirements.txt
index 67b180b..0f26ab5 100644
--- a/runner/requirements.txt
+++ b/runner/requirements.txt
@@ -1,3 +1,4 @@
 hydra-core>=1.2.0
 cogapp
+matplotlib
 pandas

From 725e8817921deaecd0c7df4ead8905cb3f587f99 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Tue, 12 Jul 2022 21:27:30 -0700
Subject: [PATCH 04/15] Do not produce image URI.

---
 runner/make-graphics.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 21f998e..d2f2192 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -6,7 +6,6 @@
 import matplotlib
 import numpy as np
 import argparse
-import base64
 
 FONT = {"family": "serif", "size": 18}
 LARGE_FONT = 28
@@ -31,8 +30,6 @@ def main():
     args = parser.parse_args()
     df = load_df(args.src_path)
     render(df, args.out_path)
-    if args.uri:
-        print_uri(args.out_path)
 
 
 def load_df(src_path: str) -> pd.DataFrame:
@@ -100,12 +97,5 @@ def render(df: pd.DataFrame, out_path: str):
     fig.savefig(out_path, transparent=False)
 
 
-def print_uri(image_path: str):
-    print(
-        f"::set-output name=DATA_URI::data:image/png;base64,"
-        f"{base64.b64encode(open(image_path, 'rb').read())}"
-    )
-
-
 if __name__ == "__main__":
     main()

From 849d4824f8b88fe5c73a7d9941d7b79d0a46244c Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Wed, 13 Jul 2022 23:23:47 -0700
Subject: [PATCH 05/15] Allow multiple source paths.

---
 runner/make-graphics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index d2f2192..ef9f29c 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -24,16 +24,16 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("src_path")
+    parser.add_argument("src_paths", nargs="+")
     parser.add_argument("out_path")
     parser.add_argument("--uri", dest="uri", action="store_true")
     args = parser.parse_args()
-    df = load_df(args.src_path)
+    df = load_df(args.src_paths)
     render(df, args.out_path)
 
 
-def load_df(src_path: str) -> pd.DataFrame:
-    df = pd.read_csv(src_path)
+def load_df(src_paths: List[str]) -> pd.DataFrame:
+    df = pd.concat([pd.read_csv(src_path) for src_path in src_paths])
     target = df.target.iloc[0].replace("lf-", "").upper()
     df["runtime_version"] = (
         [f"{target} {v}" for v in df.scheduler]

From b8527001cf4aa0a58e6f8d6a5c8494e4915675d2 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Thu, 14 Jul 2022 00:35:17 -0700
Subject: [PATCH 06/15] Make legend more precise in graph.

---
 runner/make-graphics.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index ef9f29c..167d6af 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -33,10 +33,14 @@ def main():
 
 
 def load_df(src_paths: List[str]) -> pd.DataFrame:
-    df = pd.concat([pd.read_csv(src_path) for src_path in src_paths])
+    dataframes = []
+    for src_path in src_paths:
+        dataframes.append(pd.read_csv(src_path))
+        dataframes[-1]["src_path"] = [src_path] * len(dataframes[-1].index)
+    df = pd.concat(dataframes)
     target = df.target.iloc[0].replace("lf-", "").upper()
     df["runtime_version"] = (
-        [f"{target} {v}" for v in df.scheduler]
+        [f"{target} {v} {src_path.split('.')[0].split('-')[-1]}" for v in df.scheduler]
         if "scheduler" in df.columns
         else [target] * len(df.index)
     )

From 600f68b4ce39a8e78aa0ba88ddb85fd64da52280 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Thu, 14 Jul 2022 00:49:10 -0700
Subject: [PATCH 07/15] Update make-graphics.py.

---
 runner/make-graphics.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 167d6af..26c9897 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -38,12 +38,17 @@ def load_df(src_paths: List[str]) -> pd.DataFrame:
         dataframes.append(pd.read_csv(src_path))
         dataframes[-1]["src_path"] = [src_path] * len(dataframes[-1].index)
     df = pd.concat(dataframes)
-    target = df.target.iloc[0].replace("lf-", "").upper()
-    df["runtime_version"] = (
-        [f"{target} {v} {src_path.split('.')[0].split('-')[-1]}" for v in df.scheduler]
-        if "scheduler" in df.columns
-        else [target] * len(df.index)
-    )
+    df["runtime_version"] = [
+        f"{target.replace('lf-', '').upper()} {scheduler}{src_path.split('.')[0].split('-')[-1]}"
+        for src_path, scheduler, target in zip(
+            df.src_path,
+            (
+                [ scheduler + " " for scheduler in df.scheduler ]
+                if "scheduler" in df.columns else [""] * len(df.index)
+            ),
+            df.target
+        )
+    ]
     return df
 
 

From e1b7b27bc26c696d51ac98b86091a677088ce1f1 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Fri, 15 Jul 2022 14:31:40 -0700
Subject: [PATCH 08/15] Make legend more readable.

---
 runner/make-graphics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 26c9897..88e6e51 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -11,14 +11,14 @@
 LARGE_FONT = 28
 
 STYLES = [
-    ("*", "magenta", "magenta"),
-    ("x", "cyan", "green"),
     ("o", "yellow", "orange"),
-    ("v", "blue", "purple"),
+    ("x", "cyan", "green"),
     ("+", "pink", "red"),
+    ("x", "teal", "teal"),
+    ("*", "magenta", "magenta"),
+    ("v", "blue", "purple"),
     ("*", "brown", "brown"),
     (".", "orange", "orange"),
-    ("x", "teal", "teal"),
 ]
 
 
@@ -54,7 +54,7 @@ def load_df(src_paths: List[str]) -> pd.DataFrame:
 
 def compute_legend(runtime_versions: str) -> List[Tuple[str, str, str, str]]:
     assert len(STYLES) >= len(runtime_versions)
-    return [(a, *b) for a, b in zip(sorted(runtime_versions), STYLES)]
+    return [(a, *b) for a, b in zip(runtime_versions, STYLES)]
 
 
 def render(df: pd.DataFrame, out_path: str):

From f0e10de0c153993c1c8425ac6cac166dc680a693 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Fri, 15 Jul 2022 17:30:34 -0700
Subject: [PATCH 09/15] Respect the timeout parameter.

I was surprised to find out that the timeout was being ignored. I
suspect that I introduced that mistake while I was benchmarking the
C runtime and temporarily disabled the timeout because for C,
SortedLinkedList and friends were taking so very long.
---
 runner/run_benchmark.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index e17c8b9..d879063 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -77,11 +77,11 @@ def resolve_args(config_key):
 
     # run the benchmark
     if target["run"] is not None:
+        cmd = omegaconf.OmegaConf.to_object(target["run"])
         if test_mode:
             # run the command with a timeout of 1 second. We only want to test
             # if the command executes correctly, not if the full benchmark runs
             # correctly as this would take too long
-            cmd = omegaconf.OmegaConf.to_object(target["run"])
             _, code = execute_command(["timeout", "1"] + cmd)
             # timeout returns 124 if the command executed correctly but the
             # timeout was exceeded
@@ -90,7 +90,9 @@ def resolve_args(config_key):
                     f"Command returned with non-zero exit code ({code})"
                 )
         else:
-            output, code = execute_command(target["run"])
+            output, code = execute_command(["timeout", cfg["timeout"]] + cmd)
+            if code == 124:
+                log.error(f"The command \"{' '.join(cmd)}\" timed out.")
             check_return_code(code, continue_on_error)
             times = hydra.utils.call(target["parser"], output)
             write_results(times, cfg)

From 04273d2a6410b5d63276ed0bbfb4870275d91290 Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Sat, 16 Jul 2022 14:35:03 -0700
Subject: [PATCH 10/15] Include stack trace in timeout error message.

---
 runner/make-graphics.py |  6 ++++-
 runner/run_benchmark.py | 50 ++++++++++++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 88e6e51..11ba648 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -7,6 +7,8 @@
 import numpy as np
 import argparse
 
+DEFAULT_YLIM = 1000
+
 FONT = {"family": "serif", "size": 18}
 LARGE_FONT = 28
 
@@ -66,7 +68,9 @@ def render(df: pd.DataFrame, out_path: str):
     df_numbers = df[np.isfinite(df.mean_time_ms)]
     for ax, benchmark in zip(axes, sorted(list(df.benchmark.unique()))):
         df_benchmark = df_numbers[df_numbers.benchmark == benchmark]
-        top = 1.3 * df_benchmark.mean_time_ms.max()
+        top = 1.3 * df_benchmark[np.isfinite(df_benchmark.mean_time_ms)].mean_time_ms.max()
+        if pd.isna(top):
+            top = DEFAULT_YLIM
         for version, marker, linecolor, markercolor in compute_legend(
             df.runtime_version.unique()
         ):
diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index d879063..419e617 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python3
 
 import csv
+import os
+import signal
+import time
 import hydra
 import logging
 import multiprocessing
+import numpy as np
 import omegaconf
 import subprocess
+from getpass import getpass
 
 
 log = logging.getLogger("run_benchmark")
@@ -82,7 +87,7 @@ def resolve_args(config_key):
             # run the command with a timeout of 1 second. We only want to test
             # if the command executes correctly, not if the full benchmark runs
             # correctly as this would take too long
-            _, code = execute_command(["timeout", "1"] + cmd)
+            _, code = execute_command(["timeout", "1"] + cmd, 2)
             # timeout returns 124 if the command executed correctly but the
             # timeout was exceeded
             if code != 0 and code != 124:
@@ -90,11 +95,12 @@ def resolve_args(config_key):
                     f"Command returned with non-zero exit code ({code})"
                 )
         else:
-            output, code = execute_command(["timeout", cfg["timeout"]] + cmd)
+            output, code = execute_command(cmd, cfg["timeout"], cfg["passwordless_sudo"] if "passwordless_sudo" in cfg else False)
             if code == 124:
-                log.error(f"The command \"{' '.join(cmd)}\" timed out.")
+                log.error(f"The command \"{' '.join([str(word) for word in cmd])}\" timed out.")
             check_return_code(code, continue_on_error)
             times = hydra.utils.call(target["parser"], output)
+            times += [np.infty] * (cfg["iterations"] - len(times))
             write_results(times, cfg)
     else:
         raise ValueError(f"No run command provided for target {target_name}")
@@ -135,7 +141,7 @@ def check_benchmark_target_config(benchmark, target_name):
     return True
 
 
-def execute_command(command):
+def command_to_list(command):
     # the command can be a list of lists due to the way we use an omegaconf
     # resolver to determine the arguments. We need to flatten the command list
     # first. We also need to touch each element individually to make sure that
@@ -146,24 +152,48 @@ def execute_command(command):
             cmd.extend(i)
         else:
             cmd.append(str(i))
+    return cmd
 
+
+def execute_command(command, timeout=None, passwordless_sudo=False):
+    cmd = command_to_list(command)
     cmd_str = " ".join(cmd)
     log.info(f"run command: {cmd_str}")
-
     # run the command while printing and collecting its output
     output = []
     with subprocess.Popen(
         cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
     ) as process:
+        os.set_blocking(process.stdout.fileno(), False)
+        t0 = time.time()
         cmd_log = logging.getLogger(command[0])
-        while True:
+        poll = None
+        while poll is None:
             nextline = process.stdout.readline()
-            if nextline == "" and process.poll() is not None:
-                break
-            elif nextline != "":
+            while nextline != "":
                 output.append(nextline)
                 cmd_log.info(nextline.rstrip())
-
+                nextline = process.stdout.readline()
+            time.sleep(0.5)
+            poll = process.poll()
+            if timeout is not None and time.time() - t0 > timeout:
+                # There was probably a deadlock.
+                cmd_log.error(f"{cmd_str} timed out.")
+                completed_stacktrace = None
+                cmd_log.info("We may need to ask you for sudo access in order to get a stacktrace.")
+                completed_stacktrace = subprocess.run(
+                    ["sudo", "eu-stack", "-p", str(process.pid)],
+                    capture_output=True
+                )
+                process.kill()
+                if completed_stacktrace.returncode != 0:
+                    cmd_log.error("Failed to debug the timed-out process.")
+                for line in (
+                    completed_stacktrace.stdout.decode().splitlines()
+                    + completed_stacktrace.stderr.decode().splitlines()
+                ):
+                    cmd_log.error(line)
+                return (output, 124)
         code = process.returncode
 
     return output, code

From 76aeaeccdfc9868696e82b3cc7f0170c8d18f28d Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Mon, 18 Jul 2022 14:51:24 -0700
Subject: [PATCH 11/15] Repair subprocess I/O.

---
 runner/make-graphics.py | 10 +++---
 runner/run_benchmark.py | 72 +++++++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/runner/make-graphics.py b/runner/make-graphics.py
index 11ba648..e78d008 100755
--- a/runner/make-graphics.py
+++ b/runner/make-graphics.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from typing import List, Tuple
+from typing import Iterable, List, Tuple
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib
@@ -14,13 +14,13 @@
 
 STYLES = [
     ("o", "yellow", "orange"),
-    ("x", "cyan", "green"),
-    ("+", "pink", "red"),
+    ("*", "brown", "brown"),
     ("x", "teal", "teal"),
+    ("+", "pink", "red"),
     ("*", "magenta", "magenta"),
     ("v", "blue", "purple"),
-    ("*", "brown", "brown"),
     (".", "orange", "orange"),
+    ("x", "cyan", "green"),
 ]
 
 
@@ -54,7 +54,7 @@ def load_df(src_paths: List[str]) -> pd.DataFrame:
     return df
 
 
-def compute_legend(runtime_versions: str) -> List[Tuple[str, str, str, str]]:
+def compute_legend(runtime_versions: Iterable[str]) -> List[Tuple[str, str, str, str]]:
     assert len(STYLES) >= len(runtime_versions)
     return [(a, *b) for a, b in zip(runtime_versions, STYLES)]
 
diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index 419e617..cb5f47b 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -10,7 +10,8 @@
 import numpy as np
 import omegaconf
 import subprocess
-from getpass import getpass
+from queue import Empty, Queue
+from threading import Thread
 
 
 log = logging.getLogger("run_benchmark")
@@ -155,6 +156,15 @@ def command_to_list(command):
     return cmd
 
 
+def enqueue_output(out, queue):
+    while True:
+        line = out.readline()
+        queue.put(line)
+        if not line:
+            break
+    out.close()
+
+
 def execute_command(command, timeout=None, passwordless_sudo=False):
     cmd = command_to_list(command)
     cmd_str = " ".join(cmd)
@@ -162,39 +172,37 @@ def execute_command(command, timeout=None, passwordless_sudo=False):
     # run the command while printing and collecting its output
     output = []
     with subprocess.Popen(
-        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, text=True
     ) as process:
-        os.set_blocking(process.stdout.fileno(), False)
-        t0 = time.time()
+        q = Queue()
+        t = Thread(target=enqueue_output, args=(process.stdout, q))
+        t.daemon = True
+        t.start()
         cmd_log = logging.getLogger(command[0])
-        poll = None
-        while poll is None:
-            nextline = process.stdout.readline()
-            while nextline != "":
-                output.append(nextline)
-                cmd_log.info(nextline.rstrip())
-                nextline = process.stdout.readline()
-            time.sleep(0.5)
-            poll = process.poll()
-            if timeout is not None and time.time() - t0 > timeout:
-                # There was probably a deadlock.
-                cmd_log.error(f"{cmd_str} timed out.")
-                completed_stacktrace = None
-                cmd_log.info("We may need to ask you for sudo access in order to get a stacktrace.")
-                completed_stacktrace = subprocess.run(
-                    ["sudo", "eu-stack", "-p", str(process.pid)],
-                    capture_output=True
-                )
-                process.kill()
-                if completed_stacktrace.returncode != 0:
-                    cmd_log.error("Failed to debug the timed-out process.")
-                for line in (
-                    completed_stacktrace.stdout.decode().splitlines()
-                    + completed_stacktrace.stderr.decode().splitlines()
-                ):
-                    cmd_log.error(line)
-                return (output, 124)
-        code = process.returncode
+        try:
+            line = q.get(timeout=timeout)
+            while line:
+                line = q.get(timeout=timeout)
+                output.append(line)
+                cmd_log.info(line.rstrip())
+            code = process.wait(timeout=timeout)
+        except (Empty, subprocess.TimeoutExpired):
+            cmd_log.error(f"{cmd_str} timed out.")
+            completed_stacktrace = None
+            cmd_log.info("We may need to ask you for sudo access in order to get a stacktrace.")
+            completed_stacktrace = subprocess.run(
+                ["sudo", "eu-stack", "-p", str(process.pid)],
+                capture_output=True
+            )
+            process.kill()
+            if completed_stacktrace.returncode != 0:
+                cmd_log.error("Failed to debug the timed-out process.")
+            for line in (
+                completed_stacktrace.stdout.decode().splitlines()
+                + completed_stacktrace.stderr.decode().splitlines()
+            ):
+                cmd_log.error(line)
+            return (output, 124)
 
     return output, code
 

From 6a6d73eb105d2855cf624325a489107acdcfefad Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Mon, 18 Jul 2022 15:00:26 -0700
Subject: [PATCH 12/15] Disable stacktrace by default.

---
 runner/run_benchmark.py | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index cb5f47b..61430af 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -1,9 +1,6 @@
 #!/usr/bin/env python3
 
 import csv
-import os
-import signal
-import time
 import hydra
 import logging
 import multiprocessing
@@ -96,7 +93,11 @@ def resolve_args(config_key):
                     f"Command returned with non-zero exit code ({code})"
                 )
         else:
-            output, code = execute_command(cmd, cfg["timeout"], cfg["passwordless_sudo"] if "passwordless_sudo" in cfg else False)
+            output, code = execute_command(
+                cmd,
+                cfg["timeout"],
+                cfg["stacktrace"] if "stacktrace" in cfg else False
+            )
             if code == 124:
                 log.error(f"The command \"{' '.join([str(word) for word in cmd])}\" timed out.")
             check_return_code(code, continue_on_error)
@@ -165,7 +166,7 @@ def enqueue_output(out, queue):
     out.close()
 
 
-def execute_command(command, timeout=None, passwordless_sudo=False):
+def execute_command(command, timeout=None, stacktrace=False):
     cmd = command_to_list(command)
     cmd_str = " ".join(cmd)
     log.info(f"run command: {cmd_str}")
@@ -188,20 +189,21 @@ def execute_command(command, timeout=None, passwordless_sudo=False):
             code = process.wait(timeout=timeout)
         except (Empty, subprocess.TimeoutExpired):
             cmd_log.error(f"{cmd_str} timed out.")
-            completed_stacktrace = None
-            cmd_log.info("We may need to ask you for sudo access in order to get a stacktrace.")
-            completed_stacktrace = subprocess.run(
-                ["sudo", "eu-stack", "-p", str(process.pid)],
-                capture_output=True
-            )
-            process.kill()
-            if completed_stacktrace.returncode != 0:
-                cmd_log.error("Failed to debug the timed-out process.")
-            for line in (
-                completed_stacktrace.stdout.decode().splitlines()
-                + completed_stacktrace.stderr.decode().splitlines()
-            ):
-                cmd_log.error(line)
+            if stacktrace:
+                completed_stacktrace = None
+                cmd_log.info("We may need to ask you for sudo access in order to get a stacktrace.")
+                completed_stacktrace = subprocess.run(
+                    ["sudo", "eu-stack", "-p", str(process.pid)],
+                    capture_output=True
+                )
+                process.kill()
+                if completed_stacktrace.returncode != 0:
+                    cmd_log.error("Failed to debug the timed-out process.")
+                for line in (
+                    completed_stacktrace.stdout.decode().splitlines()
+                    + completed_stacktrace.stderr.decode().splitlines()
+                ):
+                    cmd_log.error(line)
             return (output, 124)
 
     return output, code

From c210de25f01b029f6a36a9e172902ef6ab00ba2a Mon Sep 17 00:00:00 2001
From: Peter Donovan <p.v.donovan4@gmail.com>
Date: Tue, 26 Jul 2022 00:34:31 -0700
Subject: [PATCH 13/15] Re-apply suppression of empty lines.

---
 runner/run_benchmark.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
index 61430af..501c595 100755
--- a/runner/run_benchmark.py
+++ b/runner/run_benchmark.py
@@ -184,8 +184,9 @@ def execute_command(command, timeout=None, stacktrace=False):
             line = q.get(timeout=timeout)
             while line:
                 line = q.get(timeout=timeout)
-                output.append(line)
-                cmd_log.info(line.rstrip())
+                if line and not line.isspace():
+                    output.append(line)
+                    cmd_log.info(line.rstrip())
             code = process.wait(timeout=timeout)
         except (Empty, subprocess.TimeoutExpired):
             cmd_log.error(f"{cmd_str} timed out.")

From fee71c10fd866e0d494c31161c6a46958a4f6280 Mon Sep 17 00:00:00 2001
From: Peter Donovan <p.v.donovan4@gmail.com>
Date: Tue, 26 Jul 2022 08:59:31 -0700
Subject: [PATCH 14/15] Roll back unnecessary change.

---
 runner/collect_results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runner/collect_results.py b/runner/collect_results.py
index 8eac184..b6c158c 100755
--- a/runner/collect_results.py
+++ b/runner/collect_results.py
@@ -10,8 +10,8 @@
 import json
 
 
-def src_path_type(string):
-    if os.path.isdir(string) or string == "latest":
+def dir_path(string):
+    if os.path.isdir(string):
         return string
     else:
         raise NotADirectoryError(string)

From 6a6447d25b80aaf0320f0d42fa96da9c792f502d Mon Sep 17 00:00:00 2001
From: Peter Donovan <peterdonovan@berkeley.edu>
Date: Thu, 28 Jul 2022 16:32:32 -0700
Subject: [PATCH 15/15] Repair collect_results.csv.

---
 runner/collect_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/collect_results.py b/runner/collect_results.py
index b6c158c..f39df8b 100755
--- a/runner/collect_results.py
+++ b/runner/collect_results.py
@@ -20,7 +20,7 @@ def dir_path(string):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("out_file")
-    parser.add_argument("src_path", required=False, type=dir_path)
+    parser.add_argument("src_path", nargs="?", default=None, type=dir_path)
     parser.add_argument("--raw", dest="raw", action="store_true")
     args = parser.parse_args()