-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.js
146 lines (114 loc) · 4.18 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
const puppeteer = require('puppeteer');
const firebase = require('firebase');
const fs = require('fs');
const declarations = require('./declarations');
const firebaseFunctions = require('./firebase')
const config = JSON.parse(fs.readFileSync('./firebaseConfig.json'));
const credentials = JSON.parse(fs.readFileSync('./credentials.json'));
firebase.initializeApp(config);
const firestore = firebase.firestore();
const settings = { timestampsInSnapshots: true};
firestore.settings(settings);
function delay(num){
return new Promise((resp , rej)=>{
setTimeout(() => {
resp();
}, num);
})
}
class Job{
constructor(title , salary , company , url , imgURL ){
this.title = title ;
this.company = company ;
this.salary = salary ;
this.url = url ;
this.imgURL = imgURL;
}
}
(async () => {
// awaiting to delete all docs stored in db first
await firebaseFunctions.deleteAll();
// launching the chrome browser
const browser = await puppeteer.launch(
{headless : true})
const page = await browser.newPage()
await page.setViewport({ width: 1280, height: 800 })
await page.goto('https://wuzzuf.net/explore')
// signing into my websit
const email = await page.waitFor('#input-signin-email');
const password = await page.waitFor("#input-signin-password");
const signbtn = await page.waitFor('.btn-signin');
await email.type(credentials.email);
await password.type(credentials.password);
await signbtn.click();
// waiting for meaningful data of jobs to appear
const k = await page.waitFor('.css-p0kzya');
// to scroll down to fetch more jobs in the screen
for(let i =0; i<declarations.settings.ScrollLimit; i++){
await page.evaluate(_ => {
window.scrollBy(0, window.innerHeight);
});
await delay(1500);
}
// fetch all links to jobs in the page
const hrefs = await page.evaluate(() => {
const anchors = document.querySelectorAll('a.css-1lwywsz');
return [].map.call(anchors, a => a.href);
});
console.log(hrefs);
let salaries = [];
let Jobs = [] ;
const Promises = [];
// fetch salaries and other data of each job fetched above
for(let i = 0; i<hrefs.length ; i++){
Promises.push(browser.newPage().then(async page => {
await page.goto(hrefs[i] , {
timeout: 3000000
});
// salary fetching
const salary = await page.waitFor('.job-page .job-summary .salary-info dd')
const text = await (await salary.getProperty('textContent')).jsonValue();
if(text.trim() !='Confidential' && !text.includes('Confidential')){
// job title
const jobTitle = await page.waitFor('h1.job-title');
const jobTitleText = await (await jobTitle.getProperty('textContent')).jsonValue();
// company name
const companyName = await page.waitFor('p.job-subinfo');
const companyNameText = await (await companyName.getProperty("textContent")).jsonValue();
const trimmedCompanyNameText = companyNameText.replace(/(\r\n\t|\n|\r\t)/gm,"");
// logo url
const companyLogo = await page.waitFor('a.company-logo img');
const companyLogoURL = await (await companyLogo.getProperty('src')).jsonValue();
// constructing a new Job object and pushing it to an array of jobs.
let job = new Job(jobTitleText.trim() , text.trim() , trimmedCompanyNameText.trim() , hrefs[i] , companyLogoURL);
Jobs.push(job);
}
}))
}
await Promise.all(Promises);
savingData(Jobs);
// pushing newest jobs to firebase
const firebasePromises = [];
Jobs.forEach(job =>{
let JobObject= {
salary : job.salary,
title : job.title ,
company : job.company,
url : job.url,
imgURL : job.imgURL,
}
firebasePromises.push(firestore.collection("Jobs").add(JobObject));
})
await Promise.all(firebasePromises);
console.log("FINISHED CRAWLING AND UPDATED DATABASE");
// closing the browser and ending the application
await browser.close();
})()
function savingData(Data){
// just consoling the data.
Data.forEach(job =>{
console.log(job.url);
})
// TODO: implement your own way of saving the jobs, maybe send it to a firebase repo like i do or
// send it to your own website as an rest api or to a database
}