Skip to content

Scrape latest data

Scrape latest data #9

Workflow file for this run

name: Scrape latest data
on:
push:
branches:
- "main"
paths:
- "**.py"
- ".github/workflows/**"
workflow_dispatch: # This allows us to trigger manually from the GitHub Actions UI
schedule:
- cron: "0 0 1,15 * *" # Scheduled at 00:00 on day-of-month 1 and 15
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v4
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: 3.12
# Step to get the latest artifact run ID
# Fetch the latest artifact run ID using GitHub API and jq
# Save the run ID as an environment variable
# If your repository is set to private, an OAuth app token or personal access token (classic) with repo scope is required
- name: Get latest artifact run id
run: |
ARTIFACT_RUN_ID=$(curl -s "https://api.github.com/repos/${{ github.repository }}/actions/artifacts?per_page=1" | jq '.artifacts[0].workflow_run.id')
echo "artifact_run_id=$ARTIFACT_RUN_ID" >> $GITHUB_ENV
# Download the artifact (our SQLite DB!) from the last run
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: passport-index-db
path: ./data/
run-id: ${{ env.artifact_run_id }} # Run ID of the artifact (SQLite DB) uploaded from the last run
github-token: ${{ secrets.GH_PAT }} # REQUIRED. See https://github.com/actions/download-artifact?tab=readme-ov-file#download-artifacts-from-other-workflow-runs-or-repositories
continue-on-error: false
- name: Display downloaded file
run: ls data/
- name: Run scrape.py
run: python3 scrape.py
- name: Upload updated artifact
uses: actions/upload-artifact@v4
with:
name: passport-index-db # Name of the artifact to upload, make sure to match the name in the download step
path: ./data/passportindex.db
if-no-files-found: error
- name: Install datasette
run: |
pipx install datasette
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ngshiheng
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build Docker image
run: make docker-build
- name: Push Docker image
run: make docker-push