-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopenapi.yaml
307 lines (301 loc) · 8.45 KB
/
openapi.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
openapi: 3.1.0
info:
title: Firecrawl API
version: '1.0.0'
description: API for crawling, mapping, and scraping web content using Firecrawl
servers:
- url: https://nextjs-firecrawl-starter.vercel.app/api
description: Next.js API routes
components:
schemas:
Error:
type: object
properties:
error:
type: string
description: Error message
CrawlRequest:
type: object
required:
- url
properties:
url:
type: string
description: URL to crawl
limit:
type: integer
description: Maximum number of pages to crawl
default: 50
allowBackwardLinks:
type: boolean
description: Whether to allow crawling backward links
default: false
scrapeOptions:
type: object
properties:
formats:
type: array
items:
type: string
description: Content formats to extract
extract:
type: object
properties:
schema:
type: object
description: Schema for content extraction
systemPrompt:
type: string
description: System prompt for extraction
prompt:
type: string
description: User prompt for extraction
CrawlResponse:
type: object
properties:
success:
type: boolean
id:
type: string
description: Crawl job ID
CrawlStatus:
type: object
properties:
status:
type: string
enum: [scraping, processing, completed, failed]
total:
type: integer
description: Total number of pages
completed:
type: integer
description: Number of completed pages
data:
type: array
description: Crawled data (when completed)
items:
type: object
description: Crawled page data
next:
type: string
description: Next page token
error:
type: string
description: Error message (when failed)
MapRequest:
type: object
required:
- url
properties:
url:
type: string
description: URL to map
search:
type: string
description: Optional search query
ScrapeRequest:
type: object
required:
- url
properties:
url:
type: string
description: URL to scrape
formats:
type: array
items:
type: string
default: ['markdown', 'html']
description: Content formats to extract
extract:
type: object
properties:
schema:
type: object
description: Schema for content extraction
systemPrompt:
type: string
description: System prompt for extraction
prompt:
type: string
description: User prompt for extraction
actions:
type: array
items:
type: object
required:
- type
properties:
type:
type: string
description: Type of action
milliseconds:
type: integer
description: Delay in milliseconds
selector:
type: string
description: CSS selector for element
text:
type: string
description: Text to input
key:
type: string
description: Keyboard key to press
location:
type: object
properties:
country:
type: string
description: Country code
languages:
type: array
items:
type: string
description: Preferred languages
paths:
/crawl:
post:
operationId: startCrawl
summary: Start a new crawl job
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CrawlRequest'
responses:
'200':
description: Crawl job started successfully
content:
application/json:
schema:
$ref: '#/components/schemas/CrawlResponse'
'400':
description: Invalid request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
'500':
description: Server error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
/crawl/status/{id}:
get:
operationId: getCrawlStatus
summary: Get crawl job status
parameters:
- name: id
in: path
required: true
schema:
type: string
description: Crawl job ID
responses:
'200':
description: Crawl status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/CrawlStatus'
'500':
description: Server error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
/map:
post:
operationId: mapWebsite
summary: Map a website structure
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/MapRequest'
responses:
'200':
description: Website mapped successfully
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
description: Whether the mapping was successful
nodes:
type: array
description: List of mapped URLs and their relationships
items:
type: object
properties:
url:
type: string
description: URL of the page
links:
type: array
description: Outgoing links from this page
items:
type: string
'400':
description: Invalid request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
'500':
description: Server error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
/scrape:
post:
operationId: scrapePage
summary: Scrape a webpage
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ScrapeRequest'
responses:
'200':
description: Page scraped successfully
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
description: Whether the scraping was successful
content:
type: object
description: Scraped content in requested formats
properties:
markdown:
type: string
description: Content in Markdown format
html:
type: string
description: Content in HTML format
extracted:
type: object
description: Extracted data based on provided schema/prompts
'400':
description: Invalid request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
'500':
description: Server error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'