-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathINPUT_SCHEMA.json
103 lines (103 loc) · 4.42 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
"title": "Provide Craigslist Query",
"description": "To filter out irrelevant results, limit the scope of your search by providing the list of locations, categories, and search terms you are interested in.",
"type": "object",
"schemaVersion": 1,
"properties": {
"site": {
"sectionCaption": "Craigslist Search Sites",
"title": "Regional Sites",
"type": "array",
"prefill": ["auburn", "dallas, 20"],
"description": "The list of sub domains you would like to search in the form 'site, search radius (in miles)'",
"editor": "stringList"
},
"geoLocation": {
"title": "GeoLocations",
"type": "array",
"prefill": ["29.049567, -98.434871, 20"],
"description": "The list of geolocations you would like to search in the form 'latitude, longitude, search radius (in miles)'",
"editor": "stringList"
},
"zipCode": {
"title": "Zip Code",
"type": "array",
"prefill": ["24018, 10"],
"description": "The list of Zip Codes you would like to search in the form 'Code, search radius (in miles)'",
"editor": "stringList"
},
"category": {
"sectionCaption": "Craigslist Search Categories and Terms",
"title": "Categories",
"type": "array",
"prefill": ["ggg", "rrr"],
"description": "The list of categories you would like to search",
"editor": "stringList"
},
"query": {
"title": "Query - Search Terms",
"type": "array",
"prefill": ["software & engineer", "software & developer", "programmer"],
"description": "The list of Search Terms you would like to filter by",
"editor": "stringList"
},
"healthcheck": {
"title": "HealthChecks.io Endpoint",
"type": "string",
"description": "Include a healthchecks.io url endpoint to fetch at the beginning of this run",
"editor": "textfield"
},
"externalAPI": {
"title": "External API",
"type": "string",
"description": "Include a url for API endpoint to forward the results of this run",
"editor": "textfield"
},
"proxyConfiguration": {
"sectionCaption": "Proxy and browser configuration",
"title": "Proxy configuration",
"type": "object",
"description": "Choose to use no proxy, Apify Proxy, or provide custom proxy URLs.",
"prefill": { "useApifyProxy": true },
"default": { "useApifyProxy": false },
"editor": "proxy"
},
"proxyRotation": {
"title": "Proxy rotation",
"type": "string",
"description": "This property indicates the strategy of proxy rotation and can only be used in conjunction with Apify Proxy. The recommended setting automatically picks the best proxies from your available pool and rotates them evenly, discarding proxies that become blocked or unresponsive. If this strategy does not work for you for any reason, you may configure the scraper to either use a new proxy for each request, or to use one proxy as long as possible, until the proxy fails. IMPORTANT: This setting will only use your available Apify Proxy pool, so if you don't have enough proxies for a given task, no rotation setting will produce satisfactory results.",
"default": "RECOMMENDED",
"editor": "select",
"enum": ["RECOMMENDED", "PER_REQUEST", "UNTIL_FAILURE"],
"enumTitles": [
"Use recommended settings",
"Rotate proxy after each request",
"Use one proxy until failure"
]
},
"maxRequestRetries": {
"sectionCaption": "Performance and limits",
"title": "Max request retries",
"type": "integer",
"description": "Maximum number of times the request for the page will be retried in case of an error. Setting it to 0 means that the request will be attempted once and will not be retried if it fails.",
"minimum": 0,
"default": 3,
"unit": "retries"
},
"maxPagesPerCrawl": {
"title": "Max pages per run",
"type": "integer",
"description": "Maximum number of pages that the scraper will open. 0 means unlimited.",
"minimum": 0,
"default": 10,
"unit": "pages"
},
"maxConcurrency": {
"title": "Max concurrency",
"type": "integer",
"description": "Defines how many pages can be processed by the scraper in parallel. The scraper automatically increases and decreases concurrency based on available system resources. Use this option to set a hard limit.",
"minimum": 1,
"default": 50
}
}
}