-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding cache test WDL and GitHub Action #66
base: main
Are you sure you want to change the base?
Changes from all commits
8a9c7ae
89b5331
459177f
4666037
1914d57
cae2d06
41f824c
8de27fa
9781f4b
30aaad2
a2de077
9514874
9a4e8e9
ebdfbc1
54040b2
ab11b82
66011cc
1154365
5eb0f42
9824149
6642f25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
name: Validate Cromwell Caching | ||
|
||
on: | ||
pull_request: | ||
workflow_dispatch: | ||
|
||
jobs: | ||
validate-cromwell-cache: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- name: Set up Java | ||
uses: actions/setup-java@v3 | ||
with: | ||
distribution: 'temurin' | ||
java-version: '11' | ||
|
||
- name: Download Cromwell | ||
run: | | ||
wget https://github.com/broadinstitute/cromwell/releases/download/86/cromwell-86.jar | ||
|
||
- name: First run | ||
id: first-run | ||
run: | | ||
echo "Running first execution..." | ||
start_time=$(date +%s) | ||
java -Dconfig.file=cacheTest/cromwell.conf -jar cromwell-86.jar run cacheTest/cacheTest.wdl -i cacheTest/inputs.json -o cacheTest/options.json | ||
end_time=$(date +%s) | ||
duration=$((end_time - start_time)) | ||
echo "first_duration=$duration" >> $GITHUB_OUTPUT | ||
|
||
workflow_dir=$(find . -type d -name "CacheTest" -path "*/cromwell-executions/*" | sort -r | head -1) | ||
if [ -z "$workflow_dir" ]; then | ||
echo "::error::Could not find workflow execution directory" | ||
exit 1 | ||
fi | ||
|
||
latest_run=$(find "$workflow_dir" -mindepth 1 -maxdepth 1 -type d | sort -r | head -1) | ||
workflow_id=$(basename "$latest_run") | ||
echo "First workflow ID: $workflow_id" | ||
echo "first_workflow_id=$workflow_id" >> $GITHUB_OUTPUT | ||
|
||
- name: Second run (should use cache) | ||
id: second-run | ||
run: | | ||
echo "Running second execution..." | ||
start_time=$(date +%s) | ||
java -Dconfig.file=cacheTest/cromwell.conf -jar cromwell-86.jar run cacheTest/cacheTest.wdl -i cacheTest/inputs.json -o cacheTest/options.json > cromwell.log 2>&1 | ||
end_time=$(date +%s) | ||
duration=$((end_time - start_time)) | ||
echo "second_duration=$duration" >> $GITHUB_OUTPUT | ||
|
||
workflow_dir=$(find . -type d -name "CacheTest" -path "*/cromwell-executions/*" | sort -r | head -1) | ||
latest_runs=$(find "$workflow_dir" -mindepth 1 -maxdepth 1 -type d | sort -r) | ||
workflow_id=$(echo "$latest_runs" | grep -v "${{ steps.first-run.outputs.first_workflow_id }}" | head -1 | xargs basename) | ||
echo "Second workflow ID: $workflow_id" | ||
echo "second_workflow_id=$workflow_id" >> $GITHUB_OUTPUT | ||
|
||
- name: Create modified inputs | ||
run: | | ||
echo '{ | ||
"CacheTest.message": "Modified message to invalidate cache", | ||
"CacheTest.sleep_time": 19 | ||
}' > cacheTest/modified_inputs.json | ||
|
||
- name: Third run (should NOT use cache) | ||
id: third-run | ||
run: | | ||
echo "Running third execution with modified inputs..." | ||
start_time=$(date +%s) | ||
java -Dconfig.file=cacheTest/cromwell.conf -jar cromwell-86.jar run cacheTest/cacheTest.wdl -i cacheTest/modified_inputs.json -o cacheTest/options.json > cromwell_modified.log 2>&1 | ||
end_time=$(date +%s) | ||
duration=$((end_time - start_time)) | ||
echo "third_duration=$duration" >> $GITHUB_OUTPUT | ||
|
||
workflow_dir=$(find . -type d -name "CacheTest" -path "*/cromwell-executions/*" | sort -r | head -1) | ||
latest_runs=$(find "$workflow_dir" -mindepth 1 -maxdepth 1 -type d | sort -r) | ||
workflow_id=$(echo "$latest_runs" | grep -v "${{ steps.first-run.outputs.first_workflow_id }}" | grep -v "${{ steps.second-run.outputs.second_workflow_id }}" | head -1 | xargs basename) | ||
echo "Third workflow ID: $workflow_id" | ||
echo "third_workflow_id=$workflow_id" >> $GITHUB_OUTPUT | ||
|
||
- name: Validate caching behavior | ||
run: | | ||
echo "First run duration: ${{ steps.first-run.outputs.first_duration }} seconds" | ||
echo "Second run duration: ${{ steps.second-run.outputs.second_duration }} seconds" | ||
echo "Third run duration: ${{ steps.third-run.outputs.third_duration }} seconds" | ||
|
||
# Verify all runs completed | ||
if [ -z "${{ steps.first-run.outputs.first_workflow_id }}" ] || \ | ||
[ -z "${{ steps.second-run.outputs.second_workflow_id }}" ] || \ | ||
[ -z "${{ steps.third-run.outputs.third_workflow_id }}" ]; then | ||
echo "::error::One or more workflow runs failed to complete" | ||
exit 1 | ||
fi | ||
|
||
# Check for cache hit in second run | ||
if ! grep -q "cache hit copying success" cromwell.log; then | ||
echo "::error::No cache hit message found in second run Cromwell logs" | ||
exit 1 | ||
fi | ||
|
||
# Check that third run did NOT use cache | ||
if grep -q "cache hit copying success" cromwell_modified.log; then | ||
echo "::error::Cache hit found in third run when it should have been invalidated" | ||
exit 1 | ||
fi | ||
|
||
# Verify second run was significantly faster | ||
if [ ${{ steps.second-run.outputs.second_duration }} -gt $(( ${{ steps.first-run.outputs.first_duration }} / 2 )) ]; then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In your experience thus far is the timing variable? That is, is this math safe enough that even with variable load on the clusters that this will succeed? Same comment for next if block |
||
echo "::error::Cache validation failed! Second run took too long, suggesting cache wasn't used" | ||
exit 1 | ||
fi | ||
|
||
# Verify third run was NOT cached (should take similar time to first run) | ||
if [ ${{ steps.third-run.outputs.third_duration }} -lt $(( ${{ steps.first-run.outputs.first_duration }} / 2 )) ]; then | ||
echo "::error::Third run was too fast, suggesting cache was incorrectly used" | ||
exit 1 | ||
fi | ||
|
||
echo "Cache validation passed!" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should there be an |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# Unit Test for Cromwell Call Caching | ||
|
||
## Overview | ||
This workflow is designed to validate Cromwell's call caching functionality through a series of controlled test executions. It verifies that: | ||
- Identical workflow runs properly utilize the cache | ||
- Modified inputs correctly invalidate the cache | ||
- Execution times align with expected caching behavior | ||
|
||
The test consists of three sequential workflow runs: | ||
1. Initial execution to populate the cache | ||
2. Identical execution to verify cache utilization | ||
3. Modified execution to confirm cache invalidation | ||
|
||
## Purpose | ||
This workflow serves as a comprehensive test case for: | ||
- Call caching configuration | ||
- Cache hit detection | ||
- Cache invalidation | ||
- Execution time validation | ||
- Workflow output verification | ||
- Runtime environment consistency | ||
- File system interactions | ||
- Docker container caching | ||
|
||
## Workflow Components | ||
|
||
### Workflow: `CacheTest` | ||
The main workflow demonstrates caching behavior through a simple, deterministic task execution. | ||
|
||
**Inputs:** | ||
- `message`: String - Input message to be written to output | ||
- `sleep_time`: Int - Duration to sleep (defaults to 20 seconds) | ||
|
||
**Outputs:** | ||
- `output_file`: File - Generated output file containing timestamp and input message | ||
|
||
### Tasks | ||
|
||
#### Task: `GenerateTimestamp` | ||
Creates a deterministic output based on input parameters with a controlled execution time. | ||
|
||
**Runtime Requirements:** | ||
- CPU: 1 core | ||
- Memory: 1 GB | ||
- Docker: ubuntu:latest | ||
|
||
## Configuration | ||
|
||
### cromwell.conf | ||
Key configuration elements: | ||
```hocon | ||
call-caching { | ||
enabled = true | ||
invalidate-bad-cache-results = true | ||
} | ||
``` | ||
|
||
### options.json | ||
Testing options: | ||
```json | ||
{ | ||
"workflow_failure_mode": "ContinueWhilePossible", | ||
"write_to_cache": true, | ||
"read_from_cache": true | ||
} | ||
``` | ||
|
||
## GitHub Action Workflow | ||
|
||
The test is automated through a GitHub Action (`test-cromwell-cache.yml`) that: | ||
1. Sets up the Java environment | ||
2. Downloads Cromwell | ||
3. Executes three test runs | ||
4. Validates caching behavior | ||
|
||
### Test Sequence | ||
1. **First Run:** | ||
- Executes with initial inputs | ||
- Measures execution time | ||
- Records workflow ID | ||
|
||
2. **Second Run:** | ||
- Uses identical inputs | ||
- Verifies cache utilization | ||
- Confirms faster execution time | ||
|
||
3. **Third Run:** | ||
- Uses modified inputs | ||
- Verifies cache invalidation | ||
- Confirms execution time similar to first run | ||
|
||
### Validation Checks | ||
- Completion of all workflow runs | ||
- Presence of cache hit messages in second run | ||
- Absence of cache hit messages in third run | ||
- Execution time comparisons | ||
- Workflow ID uniqueness | ||
|
||
## Usage | ||
|
||
### Local Testing | ||
```bash | ||
# Execute first run | ||
java -Dconfig.file=cromwell.conf -jar cromwell.jar run cacheTest.wdl -i inputs.json -o options.json | ||
|
||
# Execute second run (should use cache) | ||
java -Dconfig.file=cromwell.conf -jar cromwell.jar run cacheTest.wdl -i inputs.json -o options.json | ||
|
||
# Execute third run (with modified inputs) | ||
java -Dconfig.file=cromwell.conf -jar cromwell.jar run cacheTest.wdl -i modified_inputs.json -o options.json | ||
``` | ||
|
||
### GitHub Actions | ||
The test will automatically run on: | ||
- Pull requests | ||
- Manual workflow dispatch | ||
|
||
## Version | ||
- WDL 1.0 | ||
- Cromwell 86 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 86 -> 87 |
||
- GitHub Actions Runner: ubuntu-latest | ||
|
||
## Additional Notes | ||
- Ensures consistent cache behavior across environments | ||
- Validates both positive and negative cache scenarios | ||
- Provides timing-based validation of cache utilization | ||
- Uses deterministic task outputs for reliable testing | ||
- Includes comprehensive error reporting | ||
- Supports both local and CI/CD testing scenarios |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
version 1.0 | ||
|
||
workflow CacheTest { | ||
input { | ||
String message | ||
Int sleep_time = 20 | ||
} | ||
|
||
call GenerateTimestamp { | ||
input: | ||
input_message = message, | ||
sleep_seconds = sleep_time | ||
} | ||
|
||
output { | ||
File output_file = GenerateTimestamp.timestamp_file | ||
} | ||
} | ||
|
||
task GenerateTimestamp { | ||
input { | ||
String input_message | ||
Int sleep_seconds | ||
} | ||
|
||
command <<< | ||
sleep ~{sleep_seconds} | ||
|
||
# Use a deterministic identifier based on inputs | ||
echo "Message: ~{input_message}" > output.txt | ||
echo "Sleep time: ~{sleep_seconds}" >> output.txt | ||
echo "Run ID: ~{input_message}-~{sleep_seconds}" >> output.txt | ||
>>> | ||
|
||
output { | ||
File timestamp_file = "output.txt" | ||
} | ||
|
||
runtime { | ||
docker: "ubuntu:latest" | ||
cpu: 1 | ||
memory: "1 GB" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
include required(classpath("application")) | ||
|
||
backend { | ||
default = "LocalExample" | ||
providers { | ||
LocalExample { | ||
actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | ||
config { | ||
job-id-regex = "(\\d+)" | ||
|
||
runtime-attributes = """ | ||
Int? cpu | ||
Int? memory_mb | ||
String? docker | ||
""" | ||
|
||
submit = """ | ||
echo "1" # Echo a dummy job ID | ||
${job_shell} ${script} | ||
""" | ||
|
||
submit-docker = """ | ||
echo "1" # Echo a dummy job ID | ||
docker run \ | ||
--rm \ | ||
-v ${cwd}:${docker_cwd} \ | ||
-w ${docker_cwd} \ | ||
${docker} \ | ||
/bin/bash ${docker_script} | ||
""" | ||
|
||
# File system settings | ||
filesystem { | ||
local { | ||
localization: [ | ||
"hard-link", "soft-link", "copy" | ||
] | ||
} | ||
} | ||
|
||
# Docker configuration | ||
docker { | ||
hash-lookup { | ||
enabled = false | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
call-caching { | ||
enabled = true | ||
invalidate-bad-cache-results = true | ||
} | ||
|
||
database { | ||
profile = "slick.jdbc.HsqldbProfile$" | ||
db { | ||
driver = "org.hsqldb.jdbcDriver" | ||
url = "jdbc:hsqldb:file:cromwell-cache-db;shutdown=false;hsqldb.tx=mvcc" | ||
connectionTimeout = 120000 | ||
numThreads = 1 | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"CacheTest.message": "This WDL is intended to test caching functionality, we'll see how it works..." | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"workflow_failure_mode": "ContinueWhilePossible", | ||
"write_to_cache": true, | ||
"read_from_cache": true | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use cromwell-87 here and in other places in this file - per discussion in slack about using the same version proof is using