-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathurl.go
executable file
·168 lines (135 loc) · 3.57 KB
/
url.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// Package net contains helper function for handling
// e.g. ip addresses or domain names
package net
import (
"bytes"
"errors"
"net"
"net/url"
"regexp"
"strings"
"github.com/PuerkitoBio/purell"
"golang.org/x/net/idna"
)
// IsURL returns true if string represents a valid URL
func IsURL(u string) bool {
var err error
u = strings.ToLower(strings.TrimSpace(u))
if IsIPAddr(u) || IsNetwork(u) || IsDomain(u) || IsFQDN(u) {
return false
}
if u, err = NormaliseURLSchema(u); err == nil {
if _, err := url.Parse(u); err == nil {
if h, err := HostFromURL(u); err == nil {
if IsIPAddr(h) || IsDomain(h) || IsFQDN(h) {
return true
}
}
}
}
return false
}
// HostFromURL extraxts hostname from given URL
func HostFromURL(u string) (string, error) {
var (
err error
host string
a *url.URL
)
if u, err = NormaliseURLSchema(u); err != nil {
return "", err
}
if a, err = url.Parse(u); err != nil {
return "", err
}
// workarounds
// known problems with net/url, see e.g. table here: https://github.com/goware/urlx
if a.Scheme == "" && a.Host == "" {
host = a.Path
} else {
if a.Host == "" && a.Path == "" {
host = a.Scheme
} else {
if host, _, err = net.SplitHostPort(a.Host); err != nil {
host = a.Host
}
}
}
host = strings.TrimLeft(strings.TrimRight(host, "]"), "[")
if i := strings.Index(host, "/"); i != -1 {
host = host[:i]
}
return host, nil
}
// NormaliseURLSchema returns normalised URL string that includes a schema.
func NormaliseURLSchema(u string) (string, error) {
var (
i int
regex *regexp.Regexp
r bytes.Buffer
)
i = strings.Index(u, "://")
if i == -1 {
r.WriteString("http://")
r.WriteString(u)
return r.String(), nil
}
// catch e.g. www-2.example.com/hello/https://www.example.com :
// there is no schema at the beginning, but as part of the Path!
if !(strings.Index(u, "/") < i || strings.Index(u, "?") < i) {
if regex = regexp.MustCompile(`^[a-zA-Z]+$`); regex.FindString(u[:i]) == "" {
return "", errors.New("existing schema is invalid")
}
}
return u, nil
}
// URLToUnicode returns normalised URL string.
func URLToUnicode(u string) (string, error) {
var (
err error
host string
unicodehost string
)
if host, err = HostFromURL(u); err != nil {
return "", err
}
if unicodehost, err = idna.ToUnicode(host); err != nil {
return "", err
}
u = strings.Replace(u, host, unicodehost, 1)
return u, nil
}
// URLToPunycode returns URL string in punycode
func URLToPunycode(u string) (string, error) {
var (
err error
host string
unicodehost string
)
if host, err = HostFromURL(u); err != nil {
return "", err
}
if unicodehost, err = idna.ToASCII(host); err != nil {
return "", err
}
u = strings.Replace(u, host, unicodehost, 1)
return u, nil
}
const normaliseFlags purell.NormalizationFlags = purell.FlagRemoveDefaultPort |
purell.FlagDecodeDWORDHost | purell.FlagDecodeHexHost | purell.FlagDecodeOctalHost |
purell.FlagRemoveUnnecessaryHostDots | purell.FlagRemoveDuplicateSlashes |
purell.FlagUppercaseEscapes | purell.FlagDecodeUnnecessaryEscapes | purell.FlagEncodeNecessaryEscapes | purell.FlagRemoveEmptyPortSeparator | purell.FlagSortQuery
// NormaliseURL returns a normalised url (e.g. without default ports like :80 for HTTP or :443 for HTTPS, duplicate slashes, etc.)
func NormaliseURL(u string) (string, error) {
var (
err error
a *url.URL
)
if !IsURL(u) {
return "", errors.New("not a url")
}
if a, err = url.Parse(u); err != nil {
return "", err
}
return purell.NormalizeURL(a, normaliseFlags), nil
}