Skip to content

Crawl episodes

Crawl episodes #7922

Workflow file for this run

name: Crawl episodes
on:
issue_comment:
types: [created] # for convenient test
schedule:
- cron: '*/30 * * * *'
jobs:
crawl_episode:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- type: 6mins
script: six_mins_english_crawler.py
- type: tfts
script: think_fast_talk_smart_crawler.py
- type: sciam
script: scientific_american_crawler.py
- type: lifekit
script: npr_life_kit_crawler.py
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.7'
- name: Install FFmpeg and espeak
run: sudo apt-get update && sudo apt-get install -y ffmpeg espeak libespeak-dev
- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m venv venv
source venv/bin/activate
pip install -r scripts/requirements.txt
pip install -e git+https://github.com/anig1scur/aeneas.git@master#egg=aeneas
- name: Crawl latest episode data
run: |
source venv/bin/activate
python scripts/${{ matrix.script }}
- name: Download audio and extract wave peaks and fragments
env:
TYPE: ${{ matrix.type }}
run: |
source venv/bin/activate
python scripts/download_audio_and_extract_wave_peaks_and_fragments.py
- name: Pull latest changes
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add .
# Check if there are any changes to commit
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Update episode data of ${{ matrix.type }}"
git pull origin ${{ github.ref }} --rebase
git push origin ${{ github.ref }}
gh workflow run deploy.yml
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}