-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.ts
80 lines (75 loc) · 2.49 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import type {
BasicCrawler,
CrawlingContext,
BasicCrawlerOptions,
BasicCrawlingContext,
HttpCrawler,
HttpCrawlingContext,
HttpCrawlerOptions,
InternalHttpCrawlingContext,
JSDOMCrawler,
JSDOMCrawlingContext,
JSDOMCrawlerOptions,
CheerioCrawler,
CheerioCrawlingContext,
CheerioCrawlerOptions,
PlaywrightCrawler,
PlaywrightCrawlingContext,
PlaywrightCrawlerOptions,
PuppeteerCrawler,
PuppeteerCrawlerOptions,
PuppeteerCrawlingContext,
} from 'crawlee';
import type { ArrVal } from '../utils/types';
/** Available Crawlee crawler types. */
export const CRAWLER_TYPE = [
'basic',
'http',
'jsdom',
'cheerio',
'playwright',
'puppeteer',
] as const;
export type CrawlerType = ArrVal<typeof CRAWLER_TYPE>;
/**
* Type utility that retrieves types related to specific Crawlee crawlers.
*
* E.g. `CrawleeMeta<'jsdom'>` will return types for JSDOM crawler:
*
* ```ts
* {
* crawler: JSDOMCrawler,
* context: JSDOMCrawlingContext<TData>,
* options: JSDOMCrawlerOptions<TData>
* }
* ```
*
* Which can then be used like so:
* ```ts
* type MyType = CrawleeMeta<'jsdom'>['context'];
* ```
* */
export type CrawlerMeta<
T extends CrawlerType,
Ctx extends CrawlingContext = CrawlingContext,
TData extends Record<string, any> = Record<string, any>
> = T extends 'http'
? Ctx extends InternalHttpCrawlingContext
? { crawler: HttpCrawler<Ctx>, context: HttpCrawlingContext<TData>, options: HttpCrawlerOptions<Ctx> } // prettier-ignore
: never
: T extends 'jsdom'
? { crawler: JSDOMCrawler, context: JSDOMCrawlingContext<TData>, options: JSDOMCrawlerOptions<TData> } // prettier-ignore
: T extends 'cheerio'
? { crawler: CheerioCrawler, context: CheerioCrawlingContext<TData>, options: CheerioCrawlerOptions<TData> } // prettier-ignore
: T extends 'playwright'
? { crawler: PlaywrightCrawler, context: PlaywrightCrawlingContext<TData>, options: PlaywrightCrawlerOptions } // prettier-ignore
: T extends 'puppeteer'
? { crawler: PuppeteerCrawler, context: PuppeteerCrawlingContext<TData>, options: PuppeteerCrawlerOptions } // prettier-ignore
: T extends 'basic'
? Ctx extends CrawlingContext
? { crawler: BasicCrawler<Ctx>, context: BasicCrawlingContext<TData>, options: BasicCrawlerOptions<Ctx> } // prettier-ignore
: never
: never;
type OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];
/** URL string or object passed to Crawler.run */
export type CrawlerUrl = NonNullable<Parameters<OrigRunCrawler<any>>[0]>[0];