From 9ff607090032702b1421993adf4d56713471bbbc Mon Sep 17 00:00:00 2001 From: Youssouf EL Azizi Date: Sat, 3 Feb 2024 23:26:01 +0100 Subject: [PATCH] feat: add workflows scripts --- .github/workflows/refetch-articles.yml | 27 +++++++++++++++ .../workflows/refetch-podcast-episodes.yml | 26 +++++++++++++++ .../workflows/update-articles-metadata.yml | 27 +++++++++++++++ .github/workflows/update-repos-metadata.yml | 27 +++++++++++++++ package.json | 6 +++- .../{rss-refresher.ts => refetch-articles.ts} | 33 ++++++++++--------- ...etcher.ts => refetch-podcasts-episodes.ts} | 33 ++++++++++++++----- src/scripts/update-articles-metadata.ts | 2 +- src/scripts/utils.ts | 3 ++ 9 files changed, 158 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/refetch-articles.yml create mode 100644 .github/workflows/refetch-podcast-episodes.yml create mode 100644 .github/workflows/update-articles-metadata.yml create mode 100644 .github/workflows/update-repos-metadata.yml rename src/scripts/{rss-refresher.ts => refetch-articles.ts} (89%) rename src/scripts/{podcast-fetcher.ts => refetch-podcasts-episodes.ts} (83%) diff --git a/.github/workflows/refetch-articles.yml b/.github/workflows/refetch-articles.yml new file mode 100644 index 0000000..7ff3d7f --- /dev/null +++ b/.github/workflows/refetch-articles.yml @@ -0,0 +1,27 @@ +# āœļø Description: +# This action will check the latest rss feed fetched and check if there ara any new posts +# Should be run on a schedule, e.g. every day at 1am and update the metadata for 10 repos at a time +name: Refetch article + +on: + schedule: + - cron: "0 0 * * *" + workflow_dispatch: + +jobs: + refetch-article: + name: Refetch article + runs-on: ubuntu-latest + timeout-minutes: 4 + + steps: + - name: šŸ“¦ Checkout project repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: šŸ“¦ Setupbun + install deps + uses: ./.github/actions/setup-bun-and-install + + - name: šŸƒā€ā™‚ļø Refetch article from rss + run: bun run refetch-articles diff --git a/.github/workflows/refetch-podcast-episodes.yml b/.github/workflows/refetch-podcast-episodes.yml new file mode 100644 index 0000000..1a75079 --- /dev/null +++ b/.github/workflows/refetch-podcast-episodes.yml @@ -0,0 +1,26 @@ +# āœļø Description: +# This action will check spotify API and check for the last podcasts episodes and add them to the database +# Should be run on a schedule, e.g. every day at 1am and update the metadata for 10 repos at a time +name: Refetch podcasts episodes (Spotify) + +on: + schedule: + - cron: "0 0 * * *" + workflow_dispatch: +jobs: + refetch-podcasts-episodes: + name: Refetch podcasts episodes + runs-on: ubuntu-latest + timeout-minutes: 4 + + steps: + - name: šŸ“¦ Checkout project repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: šŸ“¦ Setupbun + install deps + uses: ./.github/actions/setup-bun-and-install + + - name: šŸƒā€ā™‚ļø Refetch podcasts episodes + run: bun run refetch-podcasts-episodes diff --git a/.github/workflows/update-articles-metadata.yml b/.github/workflows/update-articles-metadata.yml new file mode 100644 index 0000000..b2a40ee --- /dev/null +++ b/.github/workflows/update-articles-metadata.yml @@ -0,0 +1,27 @@ +# āœļø Description: +# This action is to add missed article metadata for fetched articles. +# Should be run on a schedule, e.g. every day at 1am and update the metadata for 20 repos at a time +name: Article Metadata Update + +on: + schedule: + - cron: "0 1 * * *" + workflow_dispatch: + +jobs: + update-articles-metadata: + name: Update Articles metadata + runs-on: ubuntu-latest + timeout-minutes: 3 + + steps: + - name: šŸ“¦ Checkout project repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: šŸ“¦ Setupbun + install deps + uses: ./.github/actions/setup-bun-and-install + + - name: šŸƒā€ā™‚ļø Update Articles metadata + run: bun run update-articles-metadata diff --git a/.github/workflows/update-repos-metadata.yml b/.github/workflows/update-repos-metadata.yml new file mode 100644 index 0000000..6cce32b --- /dev/null +++ b/.github/workflows/update-repos-metadata.yml @@ -0,0 +1,27 @@ +# āœļø Description: +# This action is to update github repos with the latest metadata (stars, forks, etc) from the GitHub API. +# Should be run on a schedule, e.g. every day at 1am and update the metadata for 10 repos at a time +name: Repos Metadata Update + +on: + schedule: + - cron: "0 1 * * *" + workflow_dispatch: + +jobs: + update-repos-metadata: + name: Repos Metadata Update + runs-on: ubuntu-latest + timeout-minutes: 3 + + steps: + - name: šŸ“¦ Checkout project repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: šŸ“¦ Setupbun + install deps + uses: ./.github/actions/setup-bun-and-install + + - name: šŸƒā€ā™‚ļø Update Repos metadata + run: bun run update-repos-metadata diff --git a/package.json b/package.json index 29a6a59..9a7dda1 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,11 @@ "dev": "next dev", "lint": "SKIP_ENV_VALIDATION=1 next lint", "start": "next start", - "type-check": "tsc --noEmit" + "type-check": "tsc --noEmit", + "refetch-articles": "bun ./src/scripts/refetch-articles.ts", + "refetch-podcasts-episodes": "bun ./src/scripts/refetch-podcasts-episodes.ts", + "update-articles-metadata": "bun ./src/scripts/update-articles-metadata.ts", + "update-repos-metadata": "bun ./src/scripts/update-repos-metadata.ts" }, "dependencies": { "@clerk/nextjs": "^4.26.2", diff --git a/src/scripts/rss-refresher.ts b/src/scripts/refetch-articles.ts similarity index 89% rename from src/scripts/rss-refresher.ts rename to src/scripts/refetch-articles.ts index 260cdd0..e7f0489 100644 --- a/src/scripts/rss-refresher.ts +++ b/src/scripts/refetch-articles.ts @@ -1,8 +1,9 @@ #!/usr/bin/bun await import("../env.mjs"); -import { getXataClient } from "~/xata"; +import { type BlogsRecord, getXataClient } from "~/xata"; import { extractRssFeed } from "~/utils/extract-rss-feed"; +import type { SelectedPick } from "@xata.io/client"; /** * This script is used to fetch the RSS feed from blogs and update the article table in the database. @@ -62,20 +63,10 @@ const addBulkArticles = async (articles: Article[]): Promise => { } }; -const rssRefresher = async () => { - // only select blogs that have an rss feed and already validated by admin - // TODO: we should also check if last_rss_retrieved_at is older than 24h at least - const blog = await getXataClient() - .db.blogs.select(["rss", "title", "id"]) - .filter({ draft: false, rss: { $contains: "http" } }) - .sort("last_rss_retrieved_at", "asc") - .getFirst(); - - if (!blog) { - console.info("šŸšØ No blog found"); - return; - } - console.log(`āœ… Fetching RSS feed for ${blog.title} - ${blog.rss}`); +const fetchRssFeed = async ( + blog: SelectedPick, +) => { + console.log(`\n \nšŸš€ Fetching RSS feed for ${blog.title} - ${blog.rss} šŸš€`); const feed = await extractRssFeed(blog.rss!); if (feed?.entries === undefined) { @@ -118,4 +109,16 @@ const rssRefresher = async () => { } }; +const rssRefresher = async () => { + const blogs = await getXataClient() + .db.blogs.select(["rss", "title", "id"]) + .filter({ draft: false, rss: { $contains: "http" } }) + .sort("last_rss_retrieved_at", "asc") + .getMany(); + + for (const blog of blogs) { + await fetchRssFeed(blog); + } +}; + await rssRefresher(); diff --git a/src/scripts/podcast-fetcher.ts b/src/scripts/refetch-podcasts-episodes.ts similarity index 83% rename from src/scripts/podcast-fetcher.ts rename to src/scripts/refetch-podcasts-episodes.ts index 6545440..d2c24e9 100644 --- a/src/scripts/podcast-fetcher.ts +++ b/src/scripts/refetch-podcasts-episodes.ts @@ -1,8 +1,14 @@ #!/usr/bin/bun await import("../env.mjs"); +import type { SelectedPick } from "@xata.io/client"; import { getShowEpisodes } from "~/utils/spotify"; -import { type EpisodesRecord, getXataClient } from "~/xata"; +import { + type EpisodesRecord, + getXataClient, + type PodcastsRecord, +} from "~/xata"; +import { sleep } from "./utils"; const MAX_EPISODE_PER_PODCAST = 20; type OPError = { @@ -51,13 +57,9 @@ const addBulkEpisodes = async (episodes: EpisodesRecord[]): Promise => { } }; -const podcastFetcher = async () => { - const podcast = await getXataClient() - .db.podcasts.select(["spotify_url", "id"]) - .filter({ draft: false }) - .sort("last_rss_retrieved_at", "asc") - .getFirst(); - +const fetchPodcastEpisodes = async ( + podcast: SelectedPick, +) => { if (!podcast || !podcast.spotify_url) { console.log("āœ… No podcasts"); return; @@ -68,7 +70,7 @@ const podcastFetcher = async () => { return; } console.log( - `āœ… Start refetching podcast episodes for ${podcast.spotify_url}`, + `\n\nšŸš€ Start refetching podcast episodes for ${podcast.spotify_url} šŸš€`, ); const data = await getShowEpisodes(showId); const episodes: Partial[] = data @@ -104,4 +106,17 @@ const podcastFetcher = async () => { } }; +const podcastFetcher = async () => { + const podcasts = await getXataClient() + .db.podcasts.select(["spotify_url", "id"]) + .filter({ draft: false }) + .sort("last_rss_retrieved_at", "asc") + .getMany(); + + for (const pod of podcasts) { + await fetchPodcastEpisodes(pod); + await sleep(3000); // Make sure to not exceed the rate limit for the spotify API + } +}; + await podcastFetcher(); diff --git a/src/scripts/update-articles-metadata.ts b/src/scripts/update-articles-metadata.ts index abf1c18..87afed3 100644 --- a/src/scripts/update-articles-metadata.ts +++ b/src/scripts/update-articles-metadata.ts @@ -8,7 +8,7 @@ import { type Operation, logResults } from "./utils"; /** * This script will go throw articles with missed metadata and try to extract the metadata from the article url and update the article table in the database. */ -const MAX_ARTICLES_PER_EXECUTION = 5; +const MAX_ARTICLES_PER_EXECUTION = 10; type Article = { id: string; diff --git a/src/scripts/utils.ts b/src/scripts/utils.ts index 31418d2..1bd49c4 100644 --- a/src/scripts/utils.ts +++ b/src/scripts/utils.ts @@ -9,3 +9,6 @@ export const logResults = (operations: Operation[]) => { console.log(`${operation.isSuccess ? "āœ…" : "āŒ"} : ${operation.label}`); }); }; + +export const sleep = (ms: number) => + new Promise((resolve) => setTimeout(resolve, ms));