This repository has been archived by the owner on Aug 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
98 lines (72 loc) · 2.05 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package parser
import (
"errors"
"fmt"
"net/http"
"time"
"github.com/PuerkitoBio/goquery"
)
// Return the metadata of the page or error.
func ParseLink(p Parameters) (MetaData, error) {
// Get and Set the default parameters.
p = setDefaultParameters(p)
// Start the http client.
client := &http.Client{
Timeout: time.Second * time.Duration(p.Timeout),
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if p.AllowRedirect {
return nil
}
return errors.New("redirect not allowed. Pass `AllowRedirect: true` if you want to allow redirects")
},
}
// Validate the URL.
domain, err := ValidateLink(p.URL)
if err != nil {
panic(err)
}
// Set the URL and domain to parse.
linkData := NewLink(p.URL, domain)
start := time.Now()
metaData := NewMetaData()
log("✅ Valid URL provided.")
log("✅ Generated metadata template.")
// Fetch the html from the url.
req, err := http.NewRequest("GET", p.URL, http.NoBody)
// Add the headers.
for k, v := range p.Headers {
req.Header.Set(k, v)
}
if err != nil {
result := returnResultWithError(err.Error(), metaData, linkData)
return result, nil
}
// Parse the response.
resp, err := client.Do(req)
if err != nil {
result := returnResultWithError(err.Error(), metaData, linkData)
return result, nil
}
// Close after the request is done.
defer resp.Body.Close()
validStatusCode := 200
if resp.StatusCode != validStatusCode {
message := fmt.Sprintf(resp.Status)
result := returnResultWithError(message, metaData, linkData)
return result, nil
}
// Parse the html document.
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
result := returnResultWithError(err.Error(), metaData, linkData)
return result, nil
}
// Update the metadata.
log("⏳ Updating metadata from html document...")
metaData.generateMetaData(doc, linkData)
log("✅ Updated metadata from html document.")
end := time.Now()
elapsed := end.Sub(start)
log(fmt.Sprintf("⏱ Total time taken: %d milliseconds.", elapsed.Milliseconds()))
return *metaData, nil
}