From 068780f5def03ab65f0d775c45dc2bb1fc8d4960 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 10:16:46 +0800 Subject: [PATCH 1/6] chore: hide sequence table --- data/metadata.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data/metadata.json b/data/metadata.json index f2464fa..c1ecf4f 100644 --- a/data/metadata.json +++ b/data/metadata.json @@ -81,6 +81,9 @@ "effective_date" ], "description": "Visa requirements between countries" + }, + "sqlite_sequence": { + "hidden": true } } } From 16e4d7282b610848803c09293b17bfa43cdac9c3 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 10:26:18 +0800 Subject: [PATCH 2/6] refactor: update VisaRequirement to use natural key --- data/metadata.json | 4 ---- scrape.py | 39 ++++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/data/metadata.json b/data/metadata.json index c1ecf4f..38ce203 100644 --- a/data/metadata.json +++ b/data/metadata.json @@ -70,7 +70,6 @@ }, "VisaRequirement": { "sortable_columns": [ - "id", "from_country", "to_country", "effective_date", @@ -81,9 +80,6 @@ "effective_date" ], "description": "Visa requirements between countries" - }, - "sqlite_sequence": { - "hidden": true } } } diff --git a/scrape.py b/scrape.py index d476942..10ae382 100644 --- a/scrape.py +++ b/scrape.py @@ -35,11 +35,11 @@ def create_database(): cursor.execute(""" CREATE TABLE IF NOT EXISTS VisaRequirement ( - id INTEGER PRIMARY KEY AUTOINCREMENT, from_country TEXT, to_country TEXT, effective_date DATE NOT NULL, requirement_type TEXT, + PRIMARY KEY (from_country, to_country, effective_date), FOREIGN KEY (from_country) REFERENCES Country(code), FOREIGN KEY (to_country) REFERENCES Country(code) ) @@ -206,26 +206,27 @@ def insert_visa_requirements(from_country_code, visa_data): for to_country in countries: cursor.execute( """ - INSERT INTO VisaRequirement (from_country, to_country, effective_date, requirement_type) - SELECT ?, ?, ?, ? - WHERE NOT EXISTS ( - SELECT 1 FROM VisaRequirement - WHERE from_country = ? AND to_country = ? AND requirement_type = ? - ORDER BY effective_date DESC - LIMIT 1 - ) + SELECT requirement_type + FROM VisaRequirement + WHERE from_country = ? AND to_country = ? + ORDER BY effective_date DESC + LIMIT 1 """, - ( - from_country_code, - to_country["code"], - current_date, - req_type, - from_country_code, - to_country["code"], - req_type, - ), + (from_country_code, to_country["code"]), ) - if cursor.rowcount > 0: + + result = cursor.fetchone() + + if result is None or result[0] != req_type: + # NOTE: insert only if there's no previous record or the requirement type has changed + cursor.execute( + """ + INSERT INTO VisaRequirement (from_country, to_country, effective_date, requirement_type) + VALUES (?, ?, ?, ?) + """, + (from_country_code, to_country["code"], current_date, req_type), + ) + insert_count += 1 print( f"Inserted new requirement: {from_country_code} to {to_country['code']} ({req_type})" From a0be97efd0bb86cc3a9c992c56a6a1721f7ebe36 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 10:47:11 +0800 Subject: [PATCH 3/6] chore: update metadata and enhance visa requirement queries with new trends and summaries --- data/metadata.json | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/data/metadata.json b/data/metadata.json index 38ce203..9d2d73e 100644 --- a/data/metadata.json +++ b/data/metadata.json @@ -2,7 +2,7 @@ "title": "Passport Index Database", "description": "Tracking historical changes in passport rankings and visa requirements.", "license": "MIT", - "license_url": "https://opensource.org/licenses/MIT", + "license_url": "https://github.com/ngshiheng/passportindexdb/blob/main/LICENSE", "source": "Henley Passport Index", "source_url": "https://www.henleyglobal.com/passport-index", "databases": { @@ -13,35 +13,30 @@ "title": "Top 10 Ranked Passports", "description": "Shows the top 10 ranked passports based on the most recent year's data" }, - "visa_free_travel_trends": { - "sql": "SELECT c.name AS country, cr.year, cr.visa_free_count FROM Country c JOIN CountryRanking cr ON c.code = cr.country_code WHERE c.code IN (SELECT country_code FROM CountryRanking GROUP BY country_code HAVING COUNT(DISTINCT year) = (SELECT COUNT(DISTINCT year) FROM CountryRanking)) ORDER BY c.name, cr.year", - "title": "Visa-Free Travel Trends", - "description": "Displays the trend of visa-free travel count for countries over the years" + "passport_strength_trends": { + "sql": "WITH recent_years AS (SELECT DISTINCT year FROM CountryRanking WHERE year >= (SELECT MAX(year) - 4 FROM CountryRanking)) SELECT c.name AS country, GROUP_CONCAT(cr.year || ':' || cr.visa_free_count, ', ') AS 'Visa-Free Count Trend (Year:Count)' FROM Country c JOIN CountryRanking cr ON c.code = cr.country_code JOIN recent_years ry ON cr.year = ry.year GROUP BY c.code ORDER BY MAX(cr.visa_free_count) DESC LIMIT 20", + "title": "Passport Strength Trends", + "description": "Displays the trend of visa-free travel count for top 20 countries over the last 5 years" }, "most_improved_passports": { "sql": "WITH RankChanges AS (SELECT cr1.country_code, c.name AS country, cr1.year AS current_year, cr2.year AS previous_year, cr1.rank AS current_rank, cr2.rank AS previous_rank, cr2.rank - cr1.rank AS rank_improvement FROM CountryRanking cr1 JOIN CountryRanking cr2 ON cr1.country_code = cr2.country_code AND cr1.year = cr2.year + 1 JOIN Country c ON cr1.country_code = c.code WHERE cr1.year = ( SELECT MAX(year) FROM CountryRanking)) SELECT country, current_year, previous_year, current_rank, previous_rank, rank_improvement FROM RankChanges ORDER BY rank_improvement DESC LIMIT 10", "title": "Most Improved Passports", "description": "Lists the top 10 countries with the most improved passport rankings from the previous year to the current year" }, - "visa_requirements_summary": { - "sql": "SELECT requirement_type, COUNT(*) AS count FROM VisaRequirement WHERE effective_date = (SELECT MAX(effective_date) FROM VisaRequirement) GROUP BY requirement_type ORDER BY count DESC", - "title": "Visa Requirements Summary", - "description": "Provides a summary of different types of visa requirements and their frequencies" - }, - "countries_by_region": { - "sql": "SELECT region, COUNT(*) AS country_count FROM Country WHERE region IS NOT NULL GROUP BY region ORDER BY country_count DESC", - "title": "Countries by Region", - "description": "Shows the distribution of countries across different regions" - }, "regional_passport_strength": { - "sql": "SELECT c.region, AVG(cr.visa_free_count) AS avg_visa_free_count FROM Country c JOIN CountryRanking cr ON c.code = cr.country_code WHERE cr.year = (SELECT MAX(year) FROM CountryRanking) GROUP BY c.region ORDER BY avg_visa_free_count DESC", + "sql": "WITH recent_years AS (SELECT year FROM CountryRanking WHERE year >= (SELECT MAX(year) - 4 FROM CountryRanking) GROUP BY year), regional_yearly_avg AS (SELECT c.region, cr.year, ROUND(AVG(cr.visa_free_count), 0) AS avg_visa_free_count FROM CountryRanking cr JOIN Country c ON cr.country_code = c.code JOIN recent_years ry ON cr.year = ry.year GROUP BY c.region, cr.year) SELECT region, ROUND(AVG(avg_visa_free_count), 0) AS overall_avg_visa_free_count, GROUP_CONCAT(year || ':' || avg_visa_free_count, ', ') AS 'Yearly Avg (Year:Count)' FROM regional_yearly_avg GROUP BY region ORDER BY overall_avg_visa_free_count DESC", "title": "Regional Passport Strength", - "description": "Compares average visa-free counts by region for the most recent year" + "description": "Compares average visa-free counts by region for the last 5 years, showing overall average and yearly trends" + }, + "visa_requirement_trends": { + "sql": "WITH yearly_requirements AS (SELECT strftime('%Y', effective_date) AS year, requirement_type, COUNT(*) AS count FROM VisaRequirement GROUP BY year, requirement_type), total_yearly AS (SELECT year, SUM(count) AS total FROM yearly_requirements GROUP BY year), visa_free_yearly AS (SELECT year, COALESCE(SUM(CASE WHEN requirement_type = 'visa free' THEN count ELSE 0 END), 0) AS visa_free_count FROM yearly_requirements GROUP BY year) SELECT ty.year, vy.visa_free_count, ty.total, ROUND(CAST(vy.visa_free_count AS FLOAT) / ty.total * 100, 2) AS visa_free_percentage FROM total_yearly ty JOIN visa_free_yearly vy ON ty.year = vy.year ORDER BY ty.year", + "title": "Visa Requirement Trends", + "description": "Analyzes the trend of visa policies over the years, showing the count and percentage of visa-free requirements" }, - "visa_requirement_changes": { - "sql": "SELECT vr.requirement_type, COUNT(*) AS count, strftime('%Y', vr.effective_date) AS year FROM VisaRequirement vr GROUP BY vr.requirement_type, year ORDER BY year, requirement_type", - "title": "Visa Requirement Changes Over Time", - "description": "Analyzes how visa requirements have changed over the years" + "visa_requirement_reciprocity": { + "sql": "WITH LatestRequirements AS (SELECT from_country, to_country, requirement_type, ROW_NUMBER() OVER (PARTITION BY from_country, to_country ORDER BY effective_date DESC) as rn FROM VisaRequirement), NonReciprocal AS (SELECT lr1.from_country, lr1.to_country FROM LatestRequirements lr1 JOIN LatestRequirements lr2 ON lr1.from_country = lr2.to_country AND lr1.to_country = lr2.from_country WHERE lr1.requirement_type != lr2.requirement_type AND lr1.rn = 1 AND lr2.rn = 1) SELECT c.name AS Country, COUNT(*) AS NonReciprocal_Count, GROUP_CONCAT(c2.name, ', ') AS NonReciprocal_With FROM NonReciprocal nr JOIN Country c ON nr.from_country = c.code JOIN Country c2 ON nr.to_country = c2.code GROUP BY nr.from_country ORDER BY NonReciprocal_Count DESC LIMIT 20", + "title": "Visa Requirement Reciprocity Summary", + "description": "Summarizes non-reciprocal visa requirements by country, showing the count and list of countries with non-reciprocal requirements. Non-reciprocal visa relationships refer to situations where two countries have different visa requirements for each other's citizens." } }, "tables": { From 2448d16691143c526b0706011ee0157d6841cbb9 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 10:50:22 +0800 Subject: [PATCH 4/6] ci: init --- .github/workflows/scrape.yml | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 .github/workflows/scrape.yml diff --git a/.github/workflows/scrape.yml b/.github/workflows/scrape.yml new file mode 100644 index 0000000..3ae9daf --- /dev/null +++ b/.github/workflows/scrape.yml @@ -0,0 +1,67 @@ +name: Scrape latest data + +on: + push: + branches: + - "main" + paths: + - "**.py" + - ".github/workflows/**" + workflow_dispatch: # This allows us to trigger manually from the GitHub Actions UI + schedule: + - cron: "0 0 1,15 * *" # Scheduled at 00:00 on day-of-month 1 and 15 + +jobs: + scheduled: + runs-on: ubuntu-latest + steps: + - name: Check out this repo + uses: actions/checkout@v4 + + - name: Set up python + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + # Step to get the latest artifact run ID + # Fetch the latest artifact run ID using GitHub API and jq + # Save the run ID as an environment variable + # If your repository is set to private, an OAuth app token or personal access token (classic) with repo scope is required + - name: Get latest artifact run id + run: | + ARTIFACT_RUN_ID=$(curl -s "https://api.github.com/repos/${{ github.repository }}/actions/artifacts?per_page=1" | jq '.artifacts[0].workflow_run.id') + echo "artifact_run_id=$ARTIFACT_RUN_ID" >> $GITHUB_ENV + + # Download the artifact (our SQLite DB!) from the last run + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: passport-index-db + path: ./data/ + run-id: ${{ env.artifact_run_id }} # Run ID of the artifact (SQLite DB) uploaded from the last run + github-token: ${{ secrets.GH_PAT }} # REQUIRED. See https://github.com/actions/download-artifact?tab=readme-ov-file#download-artifacts-from-other-workflow-runs-or-repositories + continue-on-error: false + + - name: Display downloaded file + run: ls data/ + + - name: Run scrape.py + run: python3 scrape.py + + - name: Upload updated artifact + uses: actions/upload-artifact@v4 + with: + name: passport-index-db # Name of the artifact to upload, make sure to match the name in the download step + path: ./data/passportindex.db + if-no-files-found: error + + - name: Install datasette + run: | + pipx install datasette + + - name: Deploy to vercel + env: + VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} + run: |- + datasette install datasette-publish-vercel + datasette publish vercel data/passportindex.db --project=passportindexdb --install=datasette-hashed-urls --install=datasette-cluster-map --install=datasette-block-robots --token="$VERCEL_TOKEN" --metadata data/metadata.json --setting allow_download off --setting allow_csv_stream off --extra-options "-i" From 0e478aa842d13587ed5f6053b63e1e29c2708c80 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 11:26:08 +0800 Subject: [PATCH 5/6] docs: update diagram --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 27badda..b730b44 100644 --- a/README.md +++ b/README.md @@ -6,28 +6,28 @@ Tracking historical changes in passport rankings and visa requirements. ```mermaid erDiagram + Country ||--o{ CountryRanking : has + Country ||--o{ VisaRequirement : "issues/receives" + Country { - TEXT code PK - TEXT name - TEXT region + text code PK + text name + text region } + CountryRanking { - TEXT country_code FK - INTEGER year - INTEGER rank - INTEGER visa_free_count + text country_code PK, FK + int year PK + int rank + int visa_free_count } + VisaRequirement { - INTEGER id PK - TEXT from_country FK - TEXT to_country FK - DATE effective_date - TEXT requirement_type + text from_country PK, FK + text to_country PK, FK + date effective_date PK + text requirement_type } - - Country ||--o{ CountryRanking : has - Country ||--o{ VisaRequirement : requires - Country ||--o{ VisaRequirement : allows ``` ## Usage From 9f0437696776d275c1a21903a0bf33315dd64f37 Mon Sep 17 00:00:00 2001 From: Jerry Ng Date: Sun, 8 Dec 2024 11:27:44 +0800 Subject: [PATCH 6/6] docs: add diagram about how this works --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index b730b44..387d17b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,36 @@ Tracking historical changes in passport rankings and visa requirements. +## How This Works + +```mermaid +graph TB + subgraph Vercel + deployment[Datasette] + class deployment vercel; + end + + subgraph GitHub + subgraph Actions + scraper[scrape.py] + end + subgraph Artifacts + db[(passportindex.db)] + class db artifacts; + end + end + + subgraph Henley Passport Index + api[API] + end + + db --> |1: Download| scraper + api --> |2: Fetch Data| scraper + scraper --> |3: Upload| db + scraper --> |4: Publish| deployment + deployment --> |5: View/Access Data| client[User] +``` + ## Database Schema ```mermaid