Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iterative benchadapt adapter thats calls arrowbench benchmarks one at a time #115

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Suggests:
RcppSimdJson,
readr,
vroom
RoxygenNote: 7.1.2
RoxygenNote: 7.2.1
Roxygen: list(markdown = TRUE, load = "source")
Collate:
'benchmark.R'
Expand Down
2 changes: 1 addition & 1 deletion R/bm-read-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#'
#' @export
read_file <- Benchmark("read_file",
setup = function(source = names(known_sources),
setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
# TODO: break out feather_v1 and feather_v2, feather_v2 only in >= 0.17
format = c("parquet", "feather"),
compression = c("uncompressed", "snappy", "lz4"),
Expand Down
2 changes: 1 addition & 1 deletion R/bm-write-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#'
#' @export
write_file <- Benchmark("write_file",
setup = function(source = names(known_sources),
setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
format = c("parquet", "feather"),
compression = c("uncompressed", "snappy", "lz4"),
input = c("arrow_table", "data_frame")) {
Expand Down
52 changes: 43 additions & 9 deletions R/result.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ Serializable <- R6Point1Class(

active = list(
list = function() {
modifyList(self$list_serializable, private$not_to_serialize)
},
list_serializable = function() {
lapply(private$to_serialize, function(element) {
# recurse
if (inherits(element, "Serializable")) {
Expand All @@ -119,12 +122,20 @@ Serializable <- R6Point1Class(

private = list(
to_serialize = list(),
not_to_serialize = list(),

get_or_set_serializable = function(variable, value) {
if (!missing(value)) {
private$to_serialize[[variable]] <- value
}
private$to_serialize[[variable]]
},

get_or_set_not_to_serialize = function(variable, value) {
if (!missing(value)) {
private$not_to_serialize[[variable]] <- value
}
private$not_to_serialize[[variable]]
}
),

Expand Down Expand Up @@ -164,15 +175,16 @@ BenchmarkResult <- R6Point1Class(

public = list(
initialize = function(name,
result,
params,
result = NULL,
params = NULL,
tags = NULL,
info = NULL,
context = NULL,
github = NULL,
options = NULL,
output = NULL,
rscript = NULL) {
rscript = NULL,
error = NULL) {
self$name <- name
self$result <- result
self$params <- params
Expand All @@ -183,6 +195,7 @@ BenchmarkResult <- R6Point1Class(
self$options <- options
self$output <- output
self$rscript <- rscript
self$error <- error
},

to_dataframe = function(row.names = NULL, optional = FALSE, packages = "arrow", ...) {
Expand Down Expand Up @@ -213,20 +226,41 @@ BenchmarkResult <- R6Point1Class(
}

out
},

to_publishable_json = function() {
res_list <- self$list_serializable

if (!is.null(res_list$result)) {
res_list[["stats"]] <- list(
data = list(res_list$result$real),
units = "s",
iterations = length(res_list$result$real),
times = list(),
times_unit = "s"
)
res_list$result <- NULL
}

res_list$tags$name <- res_list$name
res_list$name <- NULL

jsonlite::toJSON(res_list, auto_unbox = TRUE)
}
),

active = list(
name = function(name) private$get_or_set_serializable(variable = "name", value = name),
result = function(result) private$get_or_set_serializable(variable = "result", value = result),
params = function(params) private$get_or_set_serializable(variable = "params", value = params),
params = function(params) private$get_or_set_not_to_serialize(variable = "params", value = params),
tags = function(tags) private$get_or_set_serializable(variable = "tags", value = tags),
info = function(info) private$get_or_set_serializable(variable = "info", value = info),
context = function(context) private$get_or_set_serializable(variable = "context", value = context),
github = function(github) private$get_or_set_serializable(variable = "github", value = github),
options = function(options) private$get_or_set_serializable(variable = "options", value = options),
output = function(output) private$get_or_set_serializable(variable = "output", value = output),
rscript = function(rscript) private$get_or_set_serializable(variable = "rscript", value = rscript),
options = function(options) private$get_or_set_not_to_serialize(variable = "options", value = options),
output = function(output) private$get_or_set_not_to_serialize(variable = "output", value = output),
rscript = function(rscript) private$get_or_set_not_to_serialize(variable = "rscript", value = rscript),
error = function(error) private$get_or_set_serializable(variable = "error", value = error),

params_summary = function() {
d <- self$params
Expand Down Expand Up @@ -280,7 +314,7 @@ BenchmarkFailure <- R6Point1Class(
# A class for holding a set of benchmark results
#
# This class is primarily a list of `BenchmarkResult` instances, one for each
# combination of arguments for the benchmark's parameters. The list is acessible
# combination of arguments for the benchmark's parameters. The list is accessible
# via the `$results` active binding.
#
# An instance can be passed to `as.data.frame()` and `get_params_summary()`, the
Expand All @@ -299,7 +333,7 @@ BenchmarkResults <- R6Point1Class(
},
to_dataframe = function(row.names = NULL, optional = FALSE, ...) {
x <- self$results
valid <- purrr::map_lgl(x, ~inherits(.x, "BenchmarkResult")) # failures will be BenchmarkFailure
valid <- purrr::map_lgl(x, ~!is.null(.x$result))

dplyr::bind_rows(lapply(x[valid], function(res) res$to_dataframe(...)))
}
Expand Down
61 changes: 61 additions & 0 deletions inst/arrowbench
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env Rscript
library(arrowbench)


args <- commandArgs(trailingOnly = TRUE)

benchmark_list <- list(
read_file,
write_file
)
names(benchmark_list) <- vapply(benchmark_list, function(x) x$name, character(1))

benchmark_command_json <- benchmark_list |>
purrr::imap(~cbind(data.frame(bm = .y), arrowbench:::default_params(.x))) |>
lapply(function(x) split(x, seq(nrow(x)))) |>
lapply(unname) |>
purrr::flatten() |>
lapply(as.list) |>
jsonlite::toJSON(auto_unbox = TRUE)


switch (args[[1]],
"help" = if (length(args) == 1) {
cat(
"List and run arrowbench benchmarks",
"",
"Commands:",
" help [run|list]",
" list",
" run BENCHMARK [OPTIONS]",
sep = "\n"
)
} else if (length(args) >= 2 && args[[2]] == "list") {
cat(
"List available benchmarks in a JSON list.",
"",
"Usage:",
" arrowbench list",
sep = "\n"
)
} else if (length(args) >= 2 && args[[2]] == "run") {
cat(
"Run a benchmark.",
"",
"Usage:",
" arrowbench run BENCHMARK [OPTIONS]",
"",
"Example:",
" arrowbench run read_file n_iter=2",
sep = "\n"
)
} else {
cat("Help topic not found", sep = "\n")
},
"list" = cat(benchmark_command_json),
"run" = {
arg_list <- jsonlite::fromJSON(args[[2]])
arg_list$bm <- parse(text = arg_list$bm)[[1]]
cat(suppressWarnings(do.call(run_one, arg_list)$to_publishable_json()))
}
)
68 changes: 68 additions & 0 deletions inst/arrowbench-adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json
import subprocess
from pathlib import Path
from typing import Any, Dict, Generator, List

from benchadapt import BenchmarkResult
from benchadapt.adapters import GeneratorAdapter
from benchadapt.log import log


class ArrowbenchAdapter(GeneratorAdapter):
"""
An adapter for running arrowbench benchmarks
"""

def __init__(
self,
arrowbench_executable: str,
result_fields_override: Dict[str, Any] = None,
result_fields_append: Dict[str, Any] = None,
) -> None:
self.arrowbench = arrowbench_executable

super().__init__(
generator=self.run_arrowbench,
result_fields_override=result_fields_override,
result_fields_append=result_fields_append,
)

def list_benchmarks(self) -> List[Dict[str, Any]]:
"""
Get list of benchmark commands from arrowbench CLI

Returns
-------
A list of dicts that can be passed to `arrowbench run`
"""
res = subprocess.run(f"{self.arrowbench} list", shell=True, capture_output=True)
return json.loads(res.stdout.decode())

def run_arrowbench(self) -> Generator[BenchmarkResult, None, None]:
"""
A generator that uses the arrowbench CLI to list available benchmarks,
then iterate through the list, running each and yielding the result.
"""
benchmarks = self.list_benchmarks()
# subset for demo purposes:
benchmarks = benchmarks[:10]
for benchmark in benchmarks:
command = f"{self.arrowbench} run '{json.dumps(benchmark)}'"
log.info(f"Running `{command}`")
res = subprocess.run(
command,
shell=True,
capture_output=True,
)
dict_result = json.loads(res.stdout.decode())
result = BenchmarkResult(**dict_result)
yield result


if __name__ == "__main__":
adapter = ArrowbenchAdapter(
arrowbench_executable=Path(__file__).resolve().parent / "arrowbench",
result_fields_override={"run_reason": "test"},
)
for result in adapter.run():
print(result)
6 changes: 4 additions & 2 deletions man/Benchmark.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/R6Point1Class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions tests/testthat/test-run.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,7 @@ test_that("an rscript is added to the results object", {
res <- run_benchmark(placebo, cpu_count = 10, duration = 0.1)
res_path <- test_path("results/placebo/10-0.1-TRUE.json")
expect_true(file.exists(res_path))

res <- read_json(res_path)
expect_true("rscript" %in% names(res))
expect_true(!is.null(res$results[[1]]$rscript))
})

wipe_results()