Skip to content

Commit

Permalink
Merge pull request #18 from nlnwa/doc
Browse files Browse the repository at this point in the history
Documented basic usage
  • Loading branch information
johnerikhalse authored Jun 21, 2023
2 parents bf68ebe + 6f97b42 commit 8306140
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 2 deletions.
53 changes: 51 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,53 @@
# whatwg-url
WHATWG conformant url parser for the Go language
Whatwg-url is a spec-compliant URL parser written in Go. See [WHATWG](https://url.spec.whatwg.org/#url-parsing) website
for the specification.

This is a work in progress and the API is not stable yet
## Status
Whatwg-url parser is up to date as of [24 May 2023](https://url.spec.whatwg.org/commit-snapshots/eee49fdf4f99d59f717cbeb0bce29fda930196d4/)
and passes all relevant tests from [web-platform-tests](https://github.com/web-platform-tests/wpt/tree/master/url)

## Usage
API is similar to [Chapter 6 in WHATWG URL Standard](https://url.spec.whatwg.org/#api). See [documentation](https://pkg.go.dev/github.com/nlnwa/whatwg-url) for details.

```go
import "github.com/nlnwa/whatwg-url/url"

url, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(url.Scheme()) // http
fmt.Println(url.Host()) // example.com
fmt.Println(url.Port()) // ""
fmt.Println(url.Pathname()) // "/a"
fmt.Println(url.Href(false)) // http://example.com/a?b#c
fmt.Println(url.Href(true)) // http://example.com/a?b
fmt.Println(url.Hash()) // "#c"
fmt.Println(url.Fragment()) // "c"
fmt.Println(url.Search()) // "?b"
fmt.Println(url.Query()) // "b"
fmt.Println(url) // http://example.com/a?b#c
```

### Options
The default parser instance follows the WHATWG URL Standard. To adapt parsing to other needs, create a new parser
instance and configure it with [options](https://pkg.go.dev/github.com/nlnwa/whatwg-url/url#ParserOption).

example:

```go
p := url.NewParser(url.WithAcceptInvalidCodepoints(), url.WithCollapseConsecutiveSlashes())
```

### Canonicalization
If you want canonicalization beyond what's described in the standard, you can use the
[Canonicalizer API](https://pkg.go.dev/github.com/nlnwa/whatwg-url/canonicalizer).
You can define your own canonicalization profile:

```go
c := canonicalizer.New(canonicalizer.WithRemoveUserInfo(), canonicalizer.WithRemoveFragment())
url, err := c.Parse("http://user@example.com/a?b#c")
```

Or use one of the predefined profiles:

```go
url, err := canonicalizer.GoogleSafeBrowsing.Parse("http://user@example.com/a?b#c")
```
39 changes: 39 additions & 0 deletions canonicalizer/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright 2021 National Library of Norway.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package canonicalizer_test

import (
"fmt"
"github.com/nlnwa/whatwg-url/canonicalizer"
)

func ExampleNew() {
c := canonicalizer.New(canonicalizer.WithRemoveUserInfo(), canonicalizer.WithRemoveFragment())
u, err := c.Parse("http://user@example.com/a//d?b#c")
if err == nil {
fmt.Println(u)
}
// Output: http://example.com/a//d?b
}

func ExampleGoogleSafeBrowsing() {
u, err := canonicalizer.GoogleSafeBrowsing.Parse("http://user@example.com/a//d?b#c")
if err == nil {
fmt.Println(u)
}
// Output: http://user@example.com/a/d?b
}
9 changes: 9 additions & 0 deletions canonicalizer/profiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,21 @@ var LaxPathPercentEncodeSet = url.PathPercentEncodeSet.Clear(0x2E, 0x3C, 0x3E)
var LaxQueryPercentEncodeSet = url.QueryPercentEncodeSet.Clear(0x22, 0x25, 0x2F, 0x3B, 0x3F, 0x7B)
var RepeatedQueryPercentDecodeSet = url.C0OrSpacePercentEncodeSet.Set('#', '%', '&', '=')

// WhatWg is a profile that follows the canonicalization rules used by [WHATWG].
//
// [WHATWG]: https://url.spec.whatwg.org/
var WhatWg = New()

// WhatWgSortQuery is a profile that follows the canonicalization rules used by [WHATWG], but sorts query parameters.
//
// [WHATWG]: https://url.spec.whatwg.org/
var WhatWgSortQuery = New(
WithSortQuery(SortKeys),
)

// GoogleSafeBrowsing is a profile that follows the canonicalization rules used by [Google Safe Browsing].
//
// [Google Safe Browsing]: https://developers.google.com/safe-browsing/v4/urls-hashing#canonicalization
var GoogleSafeBrowsing = New(
url.WithLaxHostParsing(),
url.WithQueryPercentEncodeSet(LaxQueryPercentEncodeSet),
Expand Down
72 changes: 72 additions & 0 deletions url/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package url_test

import (
"fmt"
"github.com/nlnwa/whatwg-url/url"
)

func ExampleNewParser() {
p := url.NewParser(url.WithAcceptInvalidCodepoints(), url.WithCollapseConsecutiveSlashes())
u, err := p.Parse("http://example*.com/a//d?b#c")
if err == nil {
fmt.Println(u)
}
// Output: http://example*.com/a/d?b#c
}

func ExampleUrl_Scheme() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Scheme())
// Output: http
}

func ExampleUrl_Host() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Host())

// Output: example.com
}

func ExampleUrl_Port() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Port())
// Output:
}

func ExampleUrl_Pathname() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Pathname())
// Output: /a
}

func ExampleUrl_Href() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Href(false)) // http://example.com/a?b#c
fmt.Println(u.Href(true)) // http://example.com/a?b
// Output: http://example.com/a?b#c
// http://example.com/a?b
}

func ExampleUrl_Hash() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Hash())
// Output: #c
}

func ExampleUrl_Fragment() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Fragment())
// Output: c
}

func ExampleUrl_Search() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Search())
// Output: ?b
}

func ExampleUrl_Query() {
u, _ := url.Parse("http://example.com:80/a?b#c")
fmt.Println(u.Query())
// Output: b
}
10 changes: 10 additions & 0 deletions url/searchparams.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type NameValuePair struct {
Name, Value string
}

// SearchParams represents a set of query parameters.
type SearchParams struct {
url *Url
params []*NameValuePair
Expand Down Expand Up @@ -60,11 +61,13 @@ func (s *SearchParams) update() {
}
}

// Append appends a new name/value pair to the search parameters.
func (s *SearchParams) Append(name, value string) {
s.params = append(s.params, &NameValuePair{Name: name, Value: value})
s.update()
}

// Delete deletes the given search parameter, and its associated value(s), from the search parameters.
func (s *SearchParams) Delete(name string) {
var result []*NameValuePair
for _, nvp := range s.params {
Expand All @@ -76,6 +79,7 @@ func (s *SearchParams) Delete(name string) {
s.update()
}

// Get returns the first value associated with the given search parameter name.
func (s *SearchParams) Get(name string) string {
for _, nvp := range s.params {
if nvp.Name == name {
Expand All @@ -85,6 +89,7 @@ func (s *SearchParams) Get(name string) string {
return ""
}

// GetAll returns all the values associated with the given search parameter name.
func (s *SearchParams) GetAll(name string) []string {
var result []string
for _, nvp := range s.params {
Expand All @@ -95,6 +100,7 @@ func (s *SearchParams) GetAll(name string) []string {
return result
}

// Has returns true if the search parameters contains a parameter with the given name.
func (s *SearchParams) Has(name string) bool {
for _, nvp := range s.params {
if nvp.Name == name {
Expand All @@ -104,6 +110,7 @@ func (s *SearchParams) Has(name string) bool {
return false
}

// Set sets the value associated with name to value. It replaces any existing values associated with name.
func (s *SearchParams) Set(name, value string) {
isSet := false
params := s.params[:0]
Expand All @@ -126,20 +133,23 @@ func (s *SearchParams) Set(name, value string) {
s.update()
}

// Sort sorts the search parameters by name.
func (s *SearchParams) Sort() {
sort.SliceStable(s.params, func(i, j int) bool {
return s.params[i].Name < s.params[j].Name
})
s.update()
}

// SortAbsolute sorts the search parameters by name and value.
func (s *SearchParams) SortAbsolute() {
sort.SliceStable(s.params, func(i, j int) bool {
return s.params[i].Name+s.params[i].Value < s.params[j].Name+s.params[j].Value
})
s.update()
}

// Iterate iterates over the search parameters.
func (s *SearchParams) Iterate(f func(pair *NameValuePair)) {
for _, nvp := range s.params {
f(nvp)
Expand Down
2 changes: 2 additions & 0 deletions url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"strings"
)

// Url represents a URL.
type Url struct {
inputUrl string
scheme string
Expand All @@ -39,6 +40,7 @@ type Url struct {
}

// Href implements WHATWG url api (https://url.spec.whatwg.org/#api)
// If excludeFragment is true, the fragment component will be excluded from the output.
func (u *Url) Href(excludeFragment bool) string {
output := u.scheme + ":"
if u.host != nil {
Expand Down

0 comments on commit 8306140

Please sign in to comment.