Skip to content

Commit

Permalink
Implement streaming injections (#212)
Browse files Browse the repository at this point in the history
* Improve/fix hop-by-hop header management in CONNECT proxying; proactively close connections when clients' or servers' transmit the intention to do so themselves

* Rewrite htmlrewrite to enable streaming injection into response without having to first read for the entire body

* Implement htmlrewrite.PrependHeadContents for use in scriptlet injector

* Rewrite htmlrewrite tests to be table-driven tests; replace crypto/rand with math/rand in genAlphanumByteArray
  • Loading branch information
anfragment authored Jan 30, 2025
1 parent 256a517 commit 06118ef
Show file tree
Hide file tree
Showing 12 changed files with 371 additions and 254 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/andybalholm/brotli v1.1.0
github.com/blang/semver v3.5.1+incompatible
github.com/getlantern/elevate v0.0.0-20220903142053-479ab992b264
github.com/hashicorp/go-multierror v1.1.1
github.com/hectane/go-acl v0.0.0-20230122075934-ca0b05cb1adb
github.com/klauspost/compress v1.17.9
github.com/wailsapp/wails/v2 v2.9.2
Expand All @@ -34,6 +35,7 @@ require (
github.com/go-stack/stack v1.8.1 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/jchv/go-winloader v0.0.0-20210711035445-715c2860da7e // indirect
github.com/labstack/echo/v4 v4.11.1 // indirect
github.com/labstack/gommon v0.4.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hectane/go-acl v0.0.0-20230122075934-ca0b05cb1adb h1:PGufWXXDq9yaev6xX1YQauaO1MV90e6Mpoq1I7Lz/VM=
github.com/hectane/go-acl v0.0.0-20230122075934-ca0b05cb1adb/go.mod h1:QiyDdbZLaJ/mZP4Zwc9g2QsfaEA4o7XvvgZegSci5/E=
github.com/jchv/go-winloader v0.0.0-20210711035445-715c2860da7e h1:Q3+PugElBCf4PFpxhErSzU3/PY5sFL5Z6rfv4AbGAck=
Expand Down
9 changes: 6 additions & 3 deletions internal/cosmetic/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ func (inj *Injector) Inject(req *http.Request, res *http.Response) error {
ruleInjection.WriteString(css)
ruleInjection.Write(injectionEnd)

htmlrewrite.ReplaceHeadContents(res, func(match []byte) []byte {
return bytes.Join([][]byte{match, ruleInjection.Bytes()}, nil)
})
// Why append and not prepend?
// When multiple CSS rules define an !important property, conflicts are resolved first by specificity and then by the order of the CSS declarations.
// Appending ensures our rules take precedence.
if err := htmlrewrite.AppendHeadContents(res, ruleInjection.Bytes()); err != nil {
return fmt.Errorf("append head contents: %w", err)
}

return nil
}
Expand Down
9 changes: 6 additions & 3 deletions internal/cssrule/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ func (inj *Injector) Inject(req *http.Request, res *http.Response) error {
ruleInjection.WriteString(strings.Join(cssRules, ""))
ruleInjection.Write(injectionEnd)

htmlrewrite.ReplaceHeadContents(res, func(match []byte) []byte {
return bytes.Join([][]byte{match, ruleInjection.Bytes()}, nil)
})
// Why append and not prepend?
// When multiple CSS rules define an !important property, conflicts are resolved first by specificity and then by the order of the CSS declarations.
// Appending ensures our rules take precedence.
if err := htmlrewrite.AppendHeadContents(res, ruleInjection.Bytes()); err != nil {
return fmt.Errorf("prepend head contents: %w", err)
}

return nil
}
3 changes: 1 addition & 2 deletions internal/filter/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func (f *Filter) HandleRequest(req *http.Request) *http.Response {
func (f *Filter) HandleResponse(req *http.Request, res *http.Response) error {
if isDocumentNavigation(req, res) {
if err := f.scriptletsInjector.Inject(req, res); err != nil {
// The error is recoverable, so we log it and continue processing the response.
// This and the following injection errors are recoverable, so we log them and continue processing the response.
log.Printf("error injecting scriptlets for %q: %v", logger.Redacted(req.URL), err)
}

Expand All @@ -290,7 +290,6 @@ func (f *Filter) HandleResponse(req *http.Request, res *http.Response) error {
log.Printf("error injecting css rules for %q: %v", logger.Redacted(req.URL), err)
}
if err := f.jsRuleInjector.Inject(req, res); err != nil {
// The error is recoverable, so we log it and continue processing the response.
log.Printf("error injecting js rules for %q: %v", logger.Redacted(req.URL), err)
}
}
Expand Down
166 changes: 113 additions & 53 deletions internal/htmlrewrite/htmlrewrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,84 +5,144 @@ import (
"fmt"
"io"
"net/http"
"regexp"
"strconv"
)

var (
bodyRegex = regexp.MustCompile(`(?is)<body[^>]*>.*</body>`)
bodyStartRegex = regexp.MustCompile(`(?i)<body[^>]*>`)
bodyEndTagLen = len("</body>")

headRegex = regexp.MustCompile(`(?is)<head[^>]*>.*</head>`)
headStartRegex = regexp.MustCompile(`(?i)<head[^>]*>`)
headEndTagLen = len("</head>")
"golang.org/x/net/html"
)

// ReplaceHeadContents allows to replace the contents of the <body> tag in an HTTP response.
// The repl function is called with the contents of the <body> tag and should return the new contents.
// PrependHeadContents allows to prepend the contents of the <head> tag in an HTTP text/html response.
//
// On error, the response body is unchanged and the caller may proceed as if the function had not been called.
func ReplaceBodyContents(res *http.Response, repl func(match []byte) []byte) error {
rawHTTPBodyBytes, err := readRawBody(res)
func PrependHeadContents(res *http.Response, prependWith []byte) error {
rawBodyReader, err := getRawBodyReader(res)
if err != nil {
return fmt.Errorf("read raw body: %w", err)
return fmt.Errorf("get raw body reader: %w", err)
}

modifiedBody := bodyRegex.ReplaceAllFunc(rawHTTPBodyBytes, func(match []byte) []byte {
startTagMatches := bodyStartRegex.FindIndex(match)
if startTagMatches == nil {
// This check is probably redundant, but let's keep it to avoid a panic in production.
return nil
}
endTagStart := len(match) - bodyEndTagLen
reader, writer := io.Pipe()

res := make([]byte, 0, len(match))
res = append(res, match[:startTagMatches[1]]...)
res = append(res, repl(match[startTagMatches[1]:endTagStart:endTagStart])...)
res = append(res, match[endTagStart:]...)
return res
})
go func() {
defer rawBodyReader.Close()

setBody(res, modifiedBody)
z := html.NewTokenizer(rawBodyReader)

outer:
for {
switch token := z.Next(); token {
case html.ErrorToken:
writer.CloseWithError(z.Err())
break outer
case html.StartTagToken:
writer.Write(z.Raw())
if name, _ := z.TagName(); bytes.Equal(name, []byte("head")) {
writer.Write(prependWith)
writer.Write(z.Buffered())
// Directly copy the remaining content, without the overhead of tokenization.
_, err := io.Copy(writer, rawBodyReader)
writer.CloseWithError(err)
break outer
}
default:
writer.Write(z.Raw())
}
}
}()

setBody(res, reader)
return nil
}

// ReplaceHeadContents allows to replace the contents of the <head> tag in an HTTP response.
// The repl function is called with the contents of the <head> tag and should return the new contents.
// PrependBodyContents allows to prepend the contents of the <body> tag in an HTTP text/html response.
//
// On error, the response body is unchanged and the caller may proceed as if the function had not been called.
func ReplaceHeadContents(res *http.Response, repl func(match []byte) []byte) error {
rawHTTPBodyBytes, err := readRawBody(res)
func PrependBodyContents(res *http.Response, prependWith []byte) error {
rawBodyReader, err := getRawBodyReader(res)
if err != nil {
return fmt.Errorf("read raw body: %w", err)
return fmt.Errorf("get raw body reader: %w", err)
}

modifiedBody := headRegex.ReplaceAllFunc(rawHTTPBodyBytes, func(match []byte) []byte {
startTagMatches := headStartRegex.FindIndex(match)
if startTagMatches == nil {
// This check is probably redundant, but let's keep it to avoid a panic in production.
return nil
reader, writer := io.Pipe()

go func() {
defer rawBodyReader.Close()

z := html.NewTokenizer(rawBodyReader)

outer:
for {
switch token := z.Next(); token {
case html.ErrorToken:
writer.CloseWithError(z.Err())
break outer
case html.StartTagToken:
writer.Write(z.Raw())
if name, _ := z.TagName(); bytes.Equal(name, []byte("body")) {
writer.Write(prependWith)
writer.Write(z.Buffered())
// Directly copy the remaining content, without the overhead of tokenization.
_, err := io.Copy(writer, rawBodyReader)
writer.CloseWithError(err)
break outer
}
default:
writer.Write(z.Raw())
}
}
endTagStart := len(match) - headEndTagLen
}()

res := make([]byte, 0, len(match))
res = append(res, match[:startTagMatches[1]]...)
res = append(res, repl(match[startTagMatches[1]:endTagStart:endTagStart])...)
res = append(res, match[endTagStart:]...)
return res
})
setBody(res, reader)
return nil
}

setBody(res, modifiedBody)
// AppendHeadContents allows to append the contents of the <head> tag in an HTTP text/html response.
//
// On error, the response body is unchanged and the caller may proceed as if the function had not been called.
func AppendHeadContents(res *http.Response, appendWith []byte) error {
rawBodyReader, err := getRawBodyReader(res)
if err != nil {
return fmt.Errorf("get raw body reader: %w", err)
}

reader, writer := io.Pipe()

go func() {
defer rawBodyReader.Close()

z := html.NewTokenizer(rawBodyReader)

outer:
for {
switch token := z.Next(); token {
case html.ErrorToken:
writer.CloseWithError(z.Err())
break outer
case html.EndTagToken:
if name, _ := z.TagName(); bytes.Equal(name, []byte("head")) {
writer.Write(appendWith)
writer.Write(z.Raw())
writer.Write(z.Buffered())
// Directly copy the remaining content, without the overhead of tokenization.
_, err := io.Copy(writer, rawBodyReader)
writer.CloseWithError(err)
break outer
}
writer.Write(z.Raw())
default:
writer.Write(z.Raw())
}
}
}()

setBody(res, reader)
return nil
}

func setBody(res *http.Response, body []byte) {
res.Body = io.NopCloser(bytes.NewReader(body))
res.ContentLength = int64(len(body))
res.Header.Set("Content-Length", strconv.Itoa(len(body)))
func setBody(res *http.Response, body io.ReadCloser) {
res.Body = body
// The resulting Content-Length cannot be determined after modifications.
// Transmit the response as chunked to allow for HTTP connection reuse without having to TCP FIN terminate the connection.
res.ContentLength = -1
res.Header.Del("Content-Length")
res.Header.Del("Content-Encoding")
res.Header.Set("Content-Type", "text/html; charset=utf-8")
res.TransferEncoding = []string{"chunked"}
res.Header.Set("Content-Type", "text/html;charset=utf-8")
}
Loading

0 comments on commit 06118ef

Please sign in to comment.