-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaction.yml
92 lines (80 loc) · 2.89 KB
/
action.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
name: 'Firecrawl Scheduled Action'
description: 'Automatically crawls a specified URL on a schedule and commits the results to your repository.'
inputs:
url:
description: 'URL to crawl'
required: true
output_folder:
description: 'Folder name where crawled content will be saved'
required: true
default: 'knowledge_bases'
api_url:
description: 'API endpoint for crawling (e.g., https://nextjs-firecrawl-starter.vercel.app)'
required: true
runs:
using: 'composite'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Create output directory if not exists
shell: bash
run: mkdir -p ${{ inputs.output_folder }}
- name: Crawl URL
id: crawl
shell: bash
run: |
response=$(curl -X POST -H "Content-Type: application/json" \
-d "{\"url\":\"${{ inputs.url }}\"}" \
${{ inputs.api_url }}/api/crawl)
if ! echo "$response" | jq . >/dev/null 2>&1; then
echo "Error: Invalid JSON response from crawl API"
exit 1
fi
crawl_id=$(echo "$response" | jq -r '.id')
if [ "$crawl_id" = "null" ] || [ -z "$crawl_id" ]; then
echo "Error: No crawl ID received"
exit 1
fi
echo "crawl_id=$crawl_id" >> $GITHUB_OUTPUT
- name: Wait for crawl completion
id: check_status
shell: bash
run: |
crawl_id="${{ steps.crawl.outputs.crawl_id }}"
while true; do
status_response=$(curl -s "${{ inputs.api_url }}/api/crawl/status/$crawl_id")
if ! echo "$status_response" | jq . >/dev/null 2>&1; then
echo "Error: Invalid JSON response from status API"
exit 1
fi
status=$(echo "$status_response" | jq -r '.status')
if [ "$status" = "completed" ]; then
markdown=$(echo "$status_response" | jq -r '.data[0].markdown')
echo "$markdown" > /tmp/crawl_result.md
break
elif [ "$status" = "failed" ]; then
exit 1
fi
sleep 5
done
- name: Generate filename from URL
id: filename
shell: bash
run: |
filename=$(echo "${{ inputs.url }}" | sed 's/[^a-zA-Z0-9]/-/g' | tr '[:upper:]' '[:lower:]')
timestamp=$(date +%Y%m%d-%H%M%S)
echo "filename=${{ inputs.output_folder }}/${filename}-${timestamp}.md" >> $GITHUB_OUTPUT
- name: Save markdown to file
shell: bash
run: cp /tmp/crawl_result.md "${{ steps.filename.outputs.filename }}"
- name: Commit and push if there are changes
shell: bash
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add "${{ inputs.output_folder }}/"
git commit -m "Add crawled documentation for ${{ inputs.url }}" || echo "No changes to commit"
git push
branding:
icon: 'globe'
color: 'blue'