-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsite.go
159 lines (137 loc) · 3.69 KB
/
site.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
package main
import (
"net/url"
log "github.com/sirupsen/logrus"
)
type Sites map[url.URL]*Site
// Site is a holding structure consisting of a URL, pointer to the
// home page and a map of all child pages.
type Site struct {
*url.URL
*Page // Home page
Pages map[url.URL]*Page
}
func NewSite(u *url.URL) *Site {
return &Site{u, nil, make(map[url.URL]*Page)}
}
func AddSite(u *url.URL) (s *Site) {
if s = NewSite(u); s != nil {
sites[*u] = s
}
return s
}
// GetSite accepts either a string representing a URL or a parsed
// url.URL, either way that URL is scrubbed and matched against the
// ACL to determine if it is to be walked. If so a site object is
// obtained and returned.
func GetSite(urlstr string) (s *Site) {
var u *url.URL
var err error
if u, err = url.Parse(urlstr); err != nil {
log.Errorf("converting url: %v", err)
return
}
// If scheme is "" it will likely have the hostname as the path
// assuming that localhost is meant, which in this case is not
// true. To over come this problem, I add an http scheme, then
// parse the string generated from the URL with http as the scheme.
if u.Scheme == "" {
u.Scheme = "http"
u, err = url.Parse(u.String())
if err != nil {
log.Errorf("Error with URL %+v", u)
return
}
}
var e bool
if s, e = sites[*u]; !e {
if s = NewSite(u); s != nil {
sites[*u] = s
}
}
return s
}
// setupSites takes a slice of strings that is assumed to be URLs. The
// strings are scrubbed, and converted to a url.URL if they are
// legit. The URL is then matched against an access-list (ACL) to
// determine if the given URL will be walked or not. If not, it is
// represented by a _blank-page_. If the URL is to be walked, it is
// added to the watchlist to be walked and scheduled for future walks.
func scrubSites(slist []string) <-chan *Page {
pchan := make(chan *Page, 5)
go func() {
for _, urlstr := range slist {
if u := scrubURL(urlstr); u != nil {
if site := GetSite(u.String()); site != nil {
if page := site.HomePage(); page != nil {
log.Infof("Setting up timer for page walk: %+v\n", page)
pchan <- page
}
}
}
}
}()
return pchan
}
func readSitesFile() []string {
sitelist := make([]string, 1)
err := storage.ReadObject("sites.json", &sitelist)
if err != nil {
log.Errorf("Storage failed to read sites.json: %v", err)
return nil
}
return sitelist
}
// SaveSites saves the sites structure.
func saveSitesFile() (err error) {
var sitelist []string
sitelist = make([]string, 1)
for _, s := range sites {
urlstr := s.URL.String()
sitelist = append(sitelist, urlstr)
}
if sitelist != nil && len(sitelist) > 0 {
if err = storage.Save("sites.json", &sitelist); err != nil {
log.Errorf("Storage Save failed for sites.json %v", err)
}
}
return err
}
func (s *Site) PageList() (plist []string) {
if s.HomePage() == nil {
return nil
}
for l, _ := range s.Links {
plist = append(plist, l)
}
return plist
}
// NewPage returns a page structure that will hold all our cool stuff
func (s *Site) NewPage(u url.URL) (p *Page) {
p = NewPage(&u)
p.Site = s
s.Page = p // Home Page
s.Pages[u] = p // All Pages
return p
}
// GetPage will return the page if it exists, or create otherwise.
func (s *Site) GetPage(u url.URL) (p *Page) {
var ex bool
if p, ex = s.Pages[u]; ex {
return p
}
p = s.NewPage(u)
return p
}
// HomePage return the page associated with the sites root or baseURL.
// If a page currently does not exist we will create one.
func (s *Site) HomePage() (p *Page) {
if s.Page == nil {
s.Page = s.NewPage(*s.URL) // Make the home page
s.Pages[*s.URL] = s.Page // save home with rest of 'em
}
return s.Page
}
func (p *Page) String() string {
return p.URL.String()
}