-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.js
30 lines (27 loc) · 960 Bytes
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
const URL = "https://en.wikipedia.org/wiki/List_of_rowing_clubs";
const name = (node) => node.querySelector("td:nth-child(2)").innerText;
const wikiUrl = (node) => node.querySelector("td:nth-child(2) > a")?.href;
const imageUrl = (node) =>
node
.querySelector("td:nth-child(1) > a > img")
?.src.replace("thumb/", "")
.replace(/\/170px-.*/, "");
const description = (node) => node.querySelector("td:nth-child(3)")?.innerText;
const clubs = Array.from(
document.querySelectorAll("table.wikitable:nth-child(25) tr")
)
.slice(1) // Skip the table header
.map((node) => ({
name: name(node),
imageUrl: imageUrl(node),
wikiUrl: wikiUrl(node),
description: description(node),
}))
.filter(
({ name, imageUrl, wikiUrl, description }) =>
console.log({ name, imageUrl, wikiUrl, description }) ||
(name !== undefined &&
imageUrl !== undefined &&
description !== undefined)
);
console.log(clubs);