Skip to content

FaunaDB scrapping #8375

FaunaDB scrapping

FaunaDB scrapping #8375

# produce yaml file cron job
name: FaunaDB scrapping
on:
push:
branches:
- 'main'
- "feature/*"
paths-ignore:
- ".github/workflows/*.yml"
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '10 1-23/2 * * 1-5'
env:
FAUNA_SECRET: ${{ secrets.FAUNA_AI_ACCESS_TOKEN }}
DISCORD_WEBHOOK: ${{ secrets.DISCORD_DAILY_REVIEW_WEBHOOK }}
DISCORD_CRITICAL_WEBHOOK: ${{ secrets.DISCORD_CRITICAL_WEBHOOK }}
# Add cron job to ensure external webpages + apis have no changed
jobs:
scrap_data:
runs-on: ubuntu-latest
name: get_data
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v3
with:
python-version: '3.9' # Version range or exact version of a Python version to use, using SemVer's version range syntax
architecture: 'x64'
- name: Install Dependencies
run: |
python3 -m pip install -r requirements.txt
python3 -m spacy download en_core_web_sm
- name: Crawl
run: |
python3 recommend_news.py
- name: Commit files
run: |
if [[ -z $(git status -s) ]]
then
echo "tree is clean"
else
git add .txt .csv || true
git add .
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
echo "tree is dirty, please commit changes before running this"
echo "PUSH_FILES=true" >> $GITHUB_ENV
git config pull.rebase false || true
git commit -m "Add changes" -a
git merge -Xours main || true
git pull origin main --strategy=ours || true
git pull origin main || true
fi
- name: Test
run: echo ${{ env.PUSH_FILES }}
- name: Push changes
if: ${{ env.PUSH_FILES == 'true' }}
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}