Skip to content

calculate-times-bicycle #1088

calculate-times-bicycle

calculate-times-bicycle #1088

---
name: calculate-times
run-name: calculate-times-${{ inputs.mode }}
on:
workflow_dispatch:
inputs:
mode:
required: true
description: Mode of travel
default: 'car'
type: choice
options:
- car
- bicycle
- foot
override_years:
required: false
description: |
Comma-separated list of OSM data years to run e.g. 2020,2023.
Will run all (see params.yaml) if null
type: string
override_states:
required: false
description: |
Comma-separated state codes to run e.g. 01,06.
Will run all (see params.yaml) if null
type: string
override_geographies:
required: false
description: |
Comma-separated geographies to limit run e.g. county,tract.
Will run all (see params.yaml) if null
type: string
env:
AWS_DEFAULT_REGION: us-east-1
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
jobs:
setup-jobs:
runs-on: ubuntu-24.04
outputs:
years: ${{ steps.create-year-jobs.outputs.param }}
states: ${{ steps.create-state-jobs.outputs.param }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create year jobs
id: create-year-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.year'
param_override: '${{ inputs.override_years }}'
- name: Create state jobs
id: create-state-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.state'
param_override: '${{ inputs.override_states }}'
run-job:
runs-on: ubuntu-24.04
needs: setup-jobs
strategy:
# Don't fail all chunks if one fails
fail-fast: false
matrix:
year: ${{ fromJSON(needs.setup-jobs.outputs.years) }}
state: ${{ fromJSON(needs.setup-jobs.outputs.states) }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Cloudflare credentials
uses: ./.github/actions/setup-cloudflare-s3
with:
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }}
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }}
- name: Remove unnecessary software and increase swap space
uses: ./.github/actions/prep-disk-and-swap
with:
# Increase swapfile size to 10 GB
swap_override: 10737418240
- name: Install DVC
uses: ./.github/actions/setup-dvc
- name: Fetch locations data
uses: ./.github/actions/fetch-locations
- name: Fetch OSM extract data
shell: bash
working-directory: 'data'
run: |
path=year=${{ matrix.year }}/geography=state/state=${{ matrix.state }}/${{ matrix.state }}
uv run dvc pull --no-run-cache \
./intermediate/osmextract/"$path".osm.pbf
- name: Fetch OSRM network data
id: fetch-osrm-data
shell: bash
working-directory: 'data'
run: |
path=s3://opentimes-resources/cache/osrmnetwork/mode=${{ inputs.mode }}/year=${{ matrix.year }}/geography=state/state=${{ matrix.state }}/osrmnetwork.tar.zst
if aws s3 ls "$path" --endpoint-url https://${{ vars.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com --profile cloudflare > /dev/null 2>&1; then
aws s3 cp --quiet --endpoint-url \
https://${{ vars.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com \
"$path" ./osrmnetwork.tar.zst --profile cloudflare
tar -xf ./osrmnetwork.tar.zst .
echo "OSRM data fetched from cache, skipping network build step"
echo "fetched='true'" >> $GITHUB_OUTPUT
else
echo "OSRM not in cache, running network build step"
echo "fetched='false'" >> $GITHUB_OUTPUT
fi
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: |
uv python install
uv venv
uv pip install ".[site,data]"
- name: Create OSRM network files
if: steps.fetch-osrm-data.outputs.fetched != 'true'
shell: bash
working-directory: 'data'
run: |
echo ${{ steps.fetch-osrm-data.outputs.fetched }}
./src/create_osrmnetwork.sh ${{ inputs.mode }} \
${{ matrix.year }} ${{ matrix.state }}
path=s3://opentimes-resources/cache/osrmnetwork/mode=${{ inputs.mode }}/year=${{ matrix.year }}/geography=state/state=${{ matrix.state }}/osrmnetwork.tar.zst
ZSTD_CLEVEL=19 sudo tar --zstd -cf ./osrmnetwork.tar.zst ./build
aws s3 cp --quiet --endpoint-url \
https://${{ vars.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com \
./osrmnetwork.tar.zst "$path" --profile cloudflare
rm -f ./osrmnetwork.tar.zst
- name: Create routing jobs per geography
id: create-geo-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.census.geography.all'
param_override: '${{ inputs.override_geographies }}'
- name: Run routing jobs
shell: bash
working-directory: 'data'
run: |
# Start the Docker backend before running jobs
docker run --rm -d -p 5333:5000 -v "./build:/data" \
osrm/osrm-backend osrm-routed --algorithm ch \
--max-table-size 100000000 /data/${{ matrix.state }}.osrm
geographies='${{ steps.create-geo-jobs.outputs.param }}'
geographies_array=($(echo "$geographies" | jq -r '.[]'))
for geo in "${geographies_array[@]}"; do
echo "Starting job with parameters: mode=${{ inputs.mode }}, year=${{ matrix.year }}, geography=${geo}, state=${{ matrix.state }}"
uv run ./src/calculate_times.py \
--mode ${{ inputs.mode }} --year ${{ matrix.year }} \
--geography "$geo" --state ${{ matrix.state }} \
--centroid-type weighted \
--write-to-s3
done