Add a testing script + docs for running the workflow analysis (#201)

RMI-PACTA · Jul 19, 2024 · cd18e5c · cd18e5c
1 parent 89bf5a7
commit cd18e5c
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@
 /test_server.key
 /test_server.pub
 /.postgres-data
-/.vscode/**
+/.vscode/**
+/workflow-data
diff --git a/README.md b/README.md
@@ -28,6 +28,39 @@ npm run local
 
 This project is at a very early stage, expect things to change rapidly.
 
+## Testing the PACTA workflow
+
+To run the PACTA workflow code (e.g. [from this repo](https://github.com/RMI-PACTA/workflow.pacta.webapp)), first create the relevant directories:
+
+```bash
+# From the repo root
+mkdir workflow-data
+cd workflow-data
+
+mkdir -p analysis-output pacta-data real-estate score-card survey benchmarks portfolios report-output summary-output
+```
+
+And then load in the relevant files:
+
+* `pacta-data` - Should contain timestamped directories (one per year or quarter or something) that contain the actual data
+* `benchmarks` - Should contain timestamped directories containing pre-rendered result sets for comparison to outputs
+* `portfolios` - Should contain a single `default_portfolio.csv`, [can be seen here](https://github.com/RMI-PACTA/workflow.pacta.webapp/blob/e02e944b9e94f8af58a83a0210fb0737b9bb908d/tests/portfolios/default_portfolio.csv)
+
+Look at `scripts/run_workflow.sh` for more details. Once all the files are in the correct location, start a run with:
+
+```bash
+bazel run //scripts:run_workflow
+```
+
+You should see output like:
+
+```
+DEBUG [...] Checking configuration.
+INFO [...] Running PACTA
+INFO [...] Starting portfolio audit
+...
+```
+
 ## Security
 
 Please report security issues to security@siliconally.org, or by using one of

diff --git a/scripts/BUILD.bazel b/scripts/BUILD.bazel
@@ -8,6 +8,11 @@ sh_binary(
     srcs = ["run_keygen.sh"],
 )
 
+sh_binary(
+    name = "run_workflow",
+    srcs = ["run_workflow.sh"],
+)
+
 sh_binary(
     name = "run_genjwt",
     srcs = ["run_genjwt.sh"],

diff --git a/scripts/run_workflow.sh b/scripts/run_workflow.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# run_workflow.sh is a script for testing out the analysis code from
+# https://github.com/RMI-PACTA/workflow.pacta.webapp
+
+set -euo pipefail
+
+ROOT="$BUILD_WORKSPACE_DIRECTORY"
+cd "$ROOT"
+
+PORTFOLIO_FOLDER=""
+BENCHMARKS_FOLDER=""
+INHERIT_BASE=""
+YEAR="2023"
+case $YEAR in
+  "2022")
+    PORTFOLIO_FOLDER="2022Q4_20240426T113151Z"
+    BENCHMARKS_FOLDER="2022Q4_20240529T002407Z"
+    INHERIT_BASE="GENERAL_2022Q4"
+    ;;
+  "2023")
+    PORTFOLIO_FOLDER="2023Q4_20240424T120055Z"
+    BENCHMARKS_FOLDER="2023Q4_20240529T002355Z"
+    INHERIT_BASE="GENERAL_2023Q4"
+    ;;
+  *)
+    echo "unexpected year $YEAR"
+    exit 1
+    ;;
+esac
+
+IS_PODMAN=false
+if docker --version | grep -q 'podman'; then
+  IS_PODMAN=true
+  echo "Running against podman"
+fi
+
+declare -a DOCKER_FLAGS=(
+  # Read-only mounts and corresponding env vars
+  "-v" "$ROOT/workflow-data/pacta-data/$PORTFOLIO_FOLDER:/mnt/pacta-data:ro"
+  "-v" "$ROOT/workflow-data/benchmarks/$BENCHMARKS_FOLDER:/mnt/benchmarks:ro"
+  "-v" "$ROOT/workflow-data/portfolios:/mnt/portfolios:ro"
+  "-v" "$ROOT/workflow-data/real-estate:/mnt/real-estate:ro"
+  "-v" "$ROOT/workflow-data/score-card:/mnt/score-card:ro"
+  "-v" "$ROOT/workflow-data/survey:/mnt/survey:ro"
+  "-e" "BENCHMARKS_DIR=/mnt/benchmarks"
+  "-e" "PACTA_DATA_DIR=/mnt/pacta-data"
+  "-e" "PORTFOLIO_DIR=/mnt/portfolios"
+  "-e" "REAL_ESTATE_DIR=/mnt/real-estate"
+  "-e" "SCORE_CARD_DIR=/mnt/score-card"
+  "-e" "SURVEY_DIR=/mnt/survey"
+
+  # Write mounts and corresponding env vars
+  "-v" "$ROOT/workflow-data/analysis-output:/mnt/analysis-output"
+  "-v" "$ROOT/workflow-data/report-output:/mnt/report-output"
+  "-v" "$ROOT/workflow-data/summary-output:/mnt/summary-output"
+  "-e" "ANALYSIS_OUTPUT_DIR=/mnt/analysis-output"
+  "-e" "REPORT_OUTPUT_DIR=/mnt/report-output"
+  "-e" "SUMMARY_OUTPUT_DIR=/mnt/summary-output"
+
+  # Misc
+  "-e" "LOG_LEVEL=DEBUG"
+)
+
+# TODO: Unclear if this will work in Docker as is. For 'normal' root-running
+# Docker daemons, it should probably just create files/directories on the host
+# owned by 1000:1000, which is fine.
+if [ "$IS_PODMAN" = true ]; then
+  DOCKER_FLAGS+=("--userns" "keep-id:uid=1000,gid=1000")
+fi
+
+
+JSON_INPUT="{
+  \"portfolio\": {
+    \"files\": \"default_portfolio.csv\",
+    \"holdingsDate\": \"2023-12-31\",
+    \"name\": \"FooPortfolio\"
+  },
+  \"inherit\": \"$INHERIT_BASE\"
+}"
+
+docker run --rm -it \
+  "${DOCKER_FLAGS[@]}" \
+  ghcr.io/rmi-pacta/workflow.pacta.webapp:nightly  "$JSON_INPUT"