Scheduled Crawl Action #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scheduled Crawl Action | |
# This workflow will automatically crawl the specified URL on a schedule and commit the results to your repository. | |
on: | |
schedule: | |
- cron: '0 0 * * *' # Replace with the cron expression for the schedule you want to use (e.g., '0 0 * * *' for daily at midnight UTC) | |
workflow_dispatch: # Allow manual triggering | |
jobs: | |
crawl: | |
runs-on: ubuntu-latest | |
permissions: | |
contents: write | |
id-token: write | |
actions: read | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Run Firecrawl Action | |
uses: ./ # Replace with the published action name e.g., cameronking4/firecrawl-cron@v1 | |
with: | |
url: 'https://news.ycombinator.com' # Replace with the URL you want to crawl regularly | |
output_folder: 'knowledge_bases' # Replace with the folder name where the output commits will be saved | |
api_url: 'https://nextjs-firecrawl-starter.vercel.app' # Replace with the API URL of your Firecrawl API endpoint, this is the default URL for the starter app. |