-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.tsx
79 lines (71 loc) · 2.31 KB
/
scrape.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/** @jsx h */
import { h, renderToString } from "./xml-jsx.ts";
import { load } from "https://esm.sh/v111/cheerio@1.0.0-rc.12";
import { ElementType } from "https://esm.sh/v111/domelementtype@2.3.0";
const isNotNull = <I,>(i: I | null): i is NonNullable<I> => i !== null;
export type Selectors = {
items: string;
title: string;
};
// Generate a list of feed items from a URL
// selectors.items should select elements that each contain an item
// selectors.title should select an element within that item containing the text of the feed
// The item element should also contain a single <a> tag with an href attribute that is the link of the item
export async function feedFromUrl(
url: string,
selectors: Selectors,
): Promise<Response> {
const res = await fetch(url);
if (!res.ok) {
return new Response(res.statusText, { status: res.status });
}
const text = await res.text();
const $ = load(text);
const items = $(selectors.items).map((_, item) => {
const $item = $(item);
const title = $item.find(selectors.title).text();
if (!title) {
console.error("Couldn't find the title element");
return null;
}
// Allow the item itself to be the link
const $link = item.type === ElementType.Tag && item.tagName === "a"
? $item
: $item.find("a");
const link = $link.attr("href");
if (!link) {
console.error("Couldn't find the link element");
return null;
}
return {
title: title.trim(),
// Resolve the potentially relative URL into an absolute URL
link: new URL(link, url).toString(),
};
}).toArray().filter(isNotNull);
if (items.length === 0) {
return new Response("No items found", {
status: 404,
});
}
const xml = '<?xml version="1.0" encoding="UTF-8" ?>' +
await renderToString(
<rss version="2.0">
<channel>
<title>{$("head > title").text()}</title>
<description>Scraped RSS feed for {url}</description>
<link>{url}</link>
{items.map((item) => (
<item>
<title>{item.title}</title>
<link>{item.link}</link>
<guid>{item.link}</guid>
</item>
))}
</channel>
</rss>,
);
return new Response(xml, {
headers: { "content-type": "application/xml;charset=utf-8" },
});
}