voltrondata-labs · alistaire47 · Oct 26, 2022 · Oct 26, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -43,7 +43,7 @@ Suggests:
     RcppSimdJson,
     readr,
     vroom
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.1
 Roxygen: list(markdown = TRUE, load = "source")
 Collate: 
     'benchmark.R'

diff --git a/R/bm-read-file.R b/R/bm-read-file.R
@@ -8,7 +8,7 @@
 #'
 #' @export
 read_file <- Benchmark("read_file",
-  setup = function(source = names(known_sources),
+  setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
                    # TODO: break out feather_v1 and feather_v2, feather_v2 only in >= 0.17
                    format = c("parquet", "feather"),
                    compression = c("uncompressed", "snappy", "lz4"),

diff --git a/R/bm-write-file.R b/R/bm-write-file.R
@@ -8,7 +8,7 @@
 #'
 #' @export
 write_file <- Benchmark("write_file",
-  setup = function(source = names(known_sources),
+  setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
                    format = c("parquet", "feather"),
                    compression = c("uncompressed", "snappy", "lz4"),
                    input = c("arrow_table", "data_frame")) {

diff --git a/R/result.R b/R/result.R
@@ -97,6 +97,9 @@ Serializable <- R6Point1Class(
 
   active = list(
     list = function() {
+      modifyList(self$list_serializable, private$not_to_serialize)
+    },
+    list_serializable = function() {
       lapply(private$to_serialize, function(element) {
         # recurse
         if (inherits(element, "Serializable")) {
@@ -119,12 +122,20 @@ Serializable <- R6Point1Class(
 
   private = list(
     to_serialize = list(),
+    not_to_serialize = list(),
 
     get_or_set_serializable = function(variable, value) {
       if (!missing(value)) {
         private$to_serialize[[variable]] <- value
       }
       private$to_serialize[[variable]]
+    },
+
+    get_or_set_not_to_serialize = function(variable, value) {
+      if (!missing(value)) {
+        private$not_to_serialize[[variable]] <- value
+      }
+      private$not_to_serialize[[variable]]
     }
   ),
 
@@ -164,15 +175,16 @@ BenchmarkResult <- R6Point1Class(
 
   public = list(
     initialize = function(name,
-                          result,
-                          params,
+                          result = NULL,
+                          params = NULL,
                           tags = NULL,
                           info = NULL,
                           context = NULL,
                           github = NULL,
                           options = NULL,
                           output = NULL,
-                          rscript = NULL) {
+                          rscript = NULL,
+                          error = NULL) {
       self$name <- name
       self$result <- result
       self$params <- params
@@ -183,6 +195,7 @@ BenchmarkResult <- R6Point1Class(
       self$options <- options
       self$output <- output
       self$rscript <- rscript
+      self$error <- error
     },
 
     to_dataframe = function(row.names = NULL, optional = FALSE, packages = "arrow", ...) {
@@ -213,20 +226,41 @@ BenchmarkResult <- R6Point1Class(
       }
 
       out
+    },
+
+    to_publishable_json = function() {
+      res_list <- self$list_serializable
+
+      if (!is.null(res_list$result)) {
+        res_list[["stats"]] <- list(
+          data = list(res_list$result$real),
+          units = "s",
+          iterations = length(res_list$result$real),
+          times = list(),
+          times_unit = "s"
+        )
+        res_list$result <- NULL
+      }
+
+      res_list$tags$name <- res_list$name
+      res_list$name <- NULL
+
+      jsonlite::toJSON(res_list, auto_unbox = TRUE)
     }
   ),
 
   active = list(
     name = function(name) private$get_or_set_serializable(variable = "name", value = name),
     result = function(result) private$get_or_set_serializable(variable = "result", value = result),
-    params = function(params) private$get_or_set_serializable(variable = "params", value = params),
+    params = function(params) private$get_or_set_not_to_serialize(variable = "params", value = params),
     tags = function(tags) private$get_or_set_serializable(variable = "tags", value = tags),
     info = function(info) private$get_or_set_serializable(variable = "info", value = info),
     context = function(context) private$get_or_set_serializable(variable = "context", value = context),
     github = function(github) private$get_or_set_serializable(variable = "github", value = github),
-    options = function(options) private$get_or_set_serializable(variable = "options", value = options),
-    output = function(output) private$get_or_set_serializable(variable = "output", value = output),
-    rscript = function(rscript) private$get_or_set_serializable(variable = "rscript", value = rscript),
+    options = function(options) private$get_or_set_not_to_serialize(variable = "options", value = options),
+    output = function(output) private$get_or_set_not_to_serialize(variable = "output", value = output),
+    rscript = function(rscript) private$get_or_set_not_to_serialize(variable = "rscript", value = rscript),
+    error = function(error) private$get_or_set_serializable(variable = "error", value = error),
 
     params_summary = function() {
       d <- self$params
@@ -280,7 +314,7 @@ BenchmarkFailure <- R6Point1Class(
 # A class for holding a set of benchmark results
 #
 # This class is primarily a list of `BenchmarkResult` instances, one for each
-# combination of arguments for the benchmark's parameters. The list is acessible
+# combination of arguments for the benchmark's parameters. The list is accessible
 # via the `$results` active binding.
 #
 # An instance can be passed to `as.data.frame()` and `get_params_summary()`, the
@@ -299,7 +333,7 @@ BenchmarkResults <- R6Point1Class(
     },
     to_dataframe = function(row.names = NULL, optional = FALSE, ...) {
       x <- self$results
-      valid <- purrr::map_lgl(x, ~inherits(.x, "BenchmarkResult"))  # failures will be BenchmarkFailure
+      valid <- purrr::map_lgl(x, ~!is.null(.x$result))
 
       dplyr::bind_rows(lapply(x[valid], function(res) res$to_dataframe(...)))
     }

diff --git a/inst/arrowbench b/inst/arrowbench
@@ -0,0 +1,61 @@
+#!/usr/bin/env Rscript
+library(arrowbench)
+
+
+args <- commandArgs(trailingOnly = TRUE)
+
+benchmark_list <- list(
+  read_file,
+  write_file
+)
+names(benchmark_list) <- vapply(benchmark_list, function(x) x$name, character(1))
+
+benchmark_command_json <- benchmark_list |>
+  purrr::imap(~cbind(data.frame(bm = .y), arrowbench:::default_params(.x))) |>
+  lapply(function(x) split(x, seq(nrow(x)))) |>
+  lapply(unname) |>
+  purrr::flatten() |>
+  lapply(as.list) |>
+  jsonlite::toJSON(auto_unbox = TRUE)
+
+
+switch (args[[1]],
+  "help" = if (length(args) == 1) {
+    cat(
+      "List and run arrowbench benchmarks",
+      "",
+      "Commands:",
+      "    help [run|list]",
+      "    list",
+      "    run BENCHMARK [OPTIONS]",
+      sep = "\n"
+    )
+  } else if (length(args) >= 2 && args[[2]] == "list") {
+    cat(
+      "List available benchmarks in a JSON list.",
+      "",
+      "Usage:",
+      "    arrowbench list",
+      sep = "\n"
+    )
+  } else if (length(args) >= 2 && args[[2]] == "run") {
+    cat(
+      "Run a benchmark.",
+      "",
+      "Usage:",
+      "    arrowbench run BENCHMARK [OPTIONS]",
+      "",
+      "Example:",
+      "    arrowbench run read_file n_iter=2",
+      sep = "\n"
+    )
+  } else {
+    cat("Help topic not found", sep = "\n")
+  },
+  "list" = cat(benchmark_command_json),
+  "run" = {
+    arg_list <- jsonlite::fromJSON(args[[2]])
+    arg_list$bm <- parse(text = arg_list$bm)[[1]]
+    cat(suppressWarnings(do.call(run_one, arg_list)$to_publishable_json()))
+  }
+)
diff --git a/inst/arrowbench-adapter.py b/inst/arrowbench-adapter.py
@@ -0,0 +1,68 @@
+import json
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, Generator, List
+
+from benchadapt import BenchmarkResult
+from benchadapt.adapters import GeneratorAdapter
+from benchadapt.log import log
+
+
+class ArrowbenchAdapter(GeneratorAdapter):
+    """
+    An adapter for running arrowbench benchmarks
+    """
+
+    def __init__(
+        self,
+        arrowbench_executable: str,
+        result_fields_override: Dict[str, Any] = None,
+        result_fields_append: Dict[str, Any] = None,
+    ) -> None:
+        self.arrowbench = arrowbench_executable
+
+        super().__init__(
+            generator=self.run_arrowbench,
+            result_fields_override=result_fields_override,
+            result_fields_append=result_fields_append,
+        )
+
+    def list_benchmarks(self) -> List[Dict[str, Any]]:
+        """
+        Get list of benchmark commands from arrowbench CLI
+
+        Returns
+        -------
+        A list of dicts that can be passed to `arrowbench run`
+        """
+        res = subprocess.run(f"{self.arrowbench} list", shell=True, capture_output=True)
+        return json.loads(res.stdout.decode())
+
+    def run_arrowbench(self) -> Generator[BenchmarkResult, None, None]:
+        """
+        A generator that uses the arrowbench CLI to list available benchmarks,
+        then iterate through the list, running each and yielding the result.
+        """
+        benchmarks = self.list_benchmarks()
+        # subset for demo purposes:
+        benchmarks = benchmarks[:10]
+        for benchmark in benchmarks:
+            command = f"{self.arrowbench} run '{json.dumps(benchmark)}'"
+            log.info(f"Running `{command}`")
+            res = subprocess.run(
+                command,
+                shell=True,
+                capture_output=True,
+            )
+            dict_result = json.loads(res.stdout.decode())
+            result = BenchmarkResult(**dict_result)
+            yield result
+
+
+if __name__ == "__main__":
+    adapter = ArrowbenchAdapter(
+        arrowbench_executable=Path(__file__).resolve().parent / "arrowbench",
+        result_fields_override={"run_reason": "test"},
+    )
+    for result in adapter.run():
+        print(result)
diff --git a/man/Benchmark.Rd b/man/Benchmark.Rd
diff --git a/man/R6Point1Class.Rd b/man/R6Point1Class.Rd
diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R
@@ -194,9 +194,7 @@ test_that("an rscript is added to the results object", {
   res <- run_benchmark(placebo, cpu_count = 10, duration = 0.1)
   res_path <- test_path("results/placebo/10-0.1-TRUE.json")
   expect_true(file.exists(res_path))
-
-  res <- read_json(res_path)
-  expect_true("rscript" %in% names(res))
+  expect_true(!is.null(res$results[[1]]$rscript))
 })
 
 wipe_results()