Update Data #1410
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Update Data | |
on: | |
workflow_dispatch: | |
push: | |
schedule: | |
# https://crontab.guru/#17_6,18_*_*_* | |
# Times in IST(+5:30): 5:47, 11:47, 17:47, 23:47 | |
- cron: '17 0,6,12,18 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
update: | |
permissions: | |
contents: write | |
name: Update data | |
# We run this on our own infra, to avoid cloud IP egress | |
runs-on: self-hosted | |
steps: | |
# This needs to come before the checkout, since checkout depends on git | |
- name: Install Dependencies | |
run: | | |
sudo apt-get update && sudo apt-get install --yes libnss3 nss-plugin-pem ca-certificates wget jq git build-essential unzip | |
sudo mkdir -p -m 755 /etc/apt/keyrings \ | |
&& wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ | |
&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ | |
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ | |
&& sudo apt update \ | |
&& sudo apt install gh -y | |
wget "https://github.com/lwthiker/curl-impersonate/releases/download/v0.6.1/curl-impersonate-v0.6.1.x86_64-linux-gnu.tar.gz" -O /tmp/curl-impersonate.tar.gz | |
echo "fa1e1614f7ba69ccc66721a0f38be457a3647eb64c75d66974b56186e3316b12 /tmp/curl-impersonate.tar.gz" | sha256sum --check --status | |
sudo tar -xzf /tmp/curl-impersonate.tar.gz -C /usr/bin | |
- name: Cache Requests SQLite | |
uses: actions/cache@v4 | |
with: | |
path: /home/runner/.cache/event-fetcher-cache.sqlite | |
key: "cache-requests-${{ github.run_id }}" | |
restore-keys: "cache-requests-" | |
- uses: actions/checkout@v4 | |
name: Clone self repository | |
with: | |
ref: ${{ github.head_ref }} | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
- name: Setup | |
run: | | |
git config --global init.defaultBranch main | |
git config --global extensions.partialClone true | |
pip install . | |
- name: Fetch updates | |
run: make clean && make -s all | |
env: | |
ZOMATO_PUBLIC_API_KEY: ${{ secrets.ZOMATO_PUBLIC_API_KEY }} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
id: upload | |
with: | |
name: events-db | |
path: | | |
events.db | |
out/pvr-* | |
if-no-files-found: error | |
retention-days: 1 | |
# Commit back some data so we can keep track of diffs | |
- uses: stefanzweifel/git-auto-commit-action@v5 | |
name: Commit | |
with: | |
commit_message: | | |
Automatic Updates 🤖 | |
Database URL: ${{ steps.upload.outputs.artifact-url }} | |
Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
Workflow: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/workflow | |
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>' | |
add_options: "--ignore-removal --update" | |
file_pattern: "out/*" | |
status_options: '--untracked-files=no' | |
env: | |
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} | |
release: | |
needs: update | |
if: ${{github.event_name == 'schedule'}} | |
runs-on: ubuntu-latest | |
name: Release | |
steps: | |
- name: fetch from artifact | |
uses: actions/download-artifact@v4 | |
with: | |
name: events-db | |
path: . | |
- uses: actions/create-github-app-token@v1 | |
id: app-token | |
with: | |
app-id: ${{ vars.PUBLISH_APP_ID }} | |
private-key: ${{ secrets.PUBLISH_PRIVATE_KEY }} | |
repositories: dataset | |
- name: publish dataset | |
env: | |
GITHUB_TOKEN: ${{ steps.app-token.outputs.token }} | |
run: | | |
export VERSION=$(date '+%Y.%-m.%-d-%H') | |
gh release create "v$VERSION" events.db out/*.csv --repo=blr-today/dataset --notes "blr.today dataset release. See LICENSE.txt for license information before using this dataset." | |
publish: | |
needs: update | |
runs-on: ubuntu-latest | |
name: Publish Website | |
steps: | |
- name: trigger build on netlify | |
run: curl -X POST -d '{}' "https://api.netlify.com/build_hooks/${BUILD_HOOK_SECRET}" | |
env: | |
BUILD_HOOK_SECRET: ${{secrets.NETLIFY_BUILD_HOOK_SECRET}} |