From df51cb0091dd00997eaca18c5ae892039abf340e Mon Sep 17 00:00:00 2001 From: nickajacks1 <128185314+nickajacks1@users.noreply.github.com> Date: Tue, 2 Jan 2024 00:43:40 -0800 Subject: [PATCH] feat: add function to parse HTTP header parameters (#1685) * feat: add function to parse HTTP header parameters The implementation is based on RFC-9110 5.6.6. * test: add fuzz for VisitHeaderParams --- header.go | 98 +++++++++++++++++++++++++++++++++++++++++++ header_test.go | 69 ++++++++++++++++++++++++++++++ header_timing_test.go | 13 ++++++ 3 files changed, 180 insertions(+) diff --git a/header.go b/header.go index 51be74afe6..16d6a6cd08 100644 --- a/header.go +++ b/header.go @@ -545,6 +545,104 @@ func (h *ResponseHeader) AddTrailerBytes(trailer []byte) error { return err } +// validHeaderFieldByte returns true if c is a valid tchar as defined +// by section 5.6.2 of [RFC9110]. +func validHeaderFieldByte(c byte) bool { + return c < 128 && tcharTable[c] +} + +// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +// / DIGIT / ALPHA +// +// See: https://www.rfc-editor.org/rfc/rfc9110#tokens +var tcharTable = [128]bool{ + '!': true, '#': true, '$': true, '%': true, '&': true, '\'': true, '*': true, '+': true, + '-': true, '.': true, '^': true, '_': true, '`': true, '|': true, '~': true, + '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, + 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, + 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, + 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, + 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, + 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, + 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, + 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, + 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, +} + +// VisitHeaderParams calls f for each parameter in the given header bytes. +// It stops processing when f returns false or an invalid parameter is found. +// Parameter values may be quoted, in which case \ is treated as an escape +// character, and the value is unquoted before being passed to value. +// See: https://www.rfc-editor.org/rfc/rfc9110#section-5.6.6 +// +// f must not retain references to key and/or value after returning. +// Copy key and/or value contents before returning if you need retaining them. +func VisitHeaderParams(b []byte, f func(key, value []byte) bool) { + for len(b) > 0 { + idxSemi := 0 + for idxSemi < len(b) && b[idxSemi] != ';' { + idxSemi++ + } + if idxSemi >= len(b) { + return + } + b = b[idxSemi+1:] + for len(b) > 0 && b[0] == ' ' { + b = b[1:] + } + + n := 0 + if len(b) == 0 || !validHeaderFieldByte(b[n]) { + return + } + n++ + for n < len(b) && validHeaderFieldByte(b[n]) { + n++ + } + + if n >= len(b)-1 || b[n] != '=' { + return + } + param := b[:n] + n++ + + switch { + case validHeaderFieldByte(b[n]): + m := n + n++ + for n < len(b) && validHeaderFieldByte(b[n]) { + n++ + } + if !f(param, b[m:n]) { + return + } + case b[n] == '"': + foundEndQuote := false + escaping := false + n++ + m := n + for ; n < len(b); n++ { + if b[n] == '"' && !escaping { + foundEndQuote = true + break + } + escaping = (b[n] == '\\' && !escaping) + } + if !foundEndQuote { + return + } + if !f(param, b[m:n]) { + return + } + n++ + default: + return + } + b = b[n:] + } +} + // MultipartFormBoundary returns boundary part // from 'multipart/form-data; boundary=...' Content-Type. func (h *RequestHeader) MultipartFormBoundary() []byte { diff --git a/header_test.go b/header_test.go index 2893751f09..eaa9a50a12 100644 --- a/header_test.go +++ b/header_test.go @@ -1061,6 +1061,75 @@ func testRequestHeaderHasAcceptEncoding(t *testing.T, ae, v string, resultExpect } } +func TestVisitHeaderParams(t *testing.T) { + t.Parallel() + testVisitHeaderParams(t, "text/plain;charset=utf-8;q=0.39", [][2]string{{"charset", "utf-8"}, {"q", "0.39"}}) + testVisitHeaderParams(t, "text/plain; foo=bar ;", [][2]string{{"foo", "bar"}}) + testVisitHeaderParams(t, `text/plain; foo="bar"; `, [][2]string{{"foo", "bar"}}) + testVisitHeaderParams(t, `text/plain; foo="text/plain,text/html;charset=\"utf-8\""`, [][2]string{{"foo", `text/plain,text/html;charset=\"utf-8\"`}}) + testVisitHeaderParams(t, "text/plain foo=bar", [][2]string{}) + testVisitHeaderParams(t, "text/plain;", [][2]string{}) + testVisitHeaderParams(t, "text/plain; ", [][2]string{}) + testVisitHeaderParams(t, "text/plain; foo", [][2]string{}) + testVisitHeaderParams(t, "text/plain; foo=", [][2]string{}) + testVisitHeaderParams(t, "text/plain; =bar", [][2]string{}) + testVisitHeaderParams(t, "text/plain; foo = bar", [][2]string{}) + testVisitHeaderParams(t, `text/plain; foo="bar`, [][2]string{}) + testVisitHeaderParams(t, "text/plain;;foo=bar", [][2]string{}) + + parsed := make([][2]string, 0) + VisitHeaderParams([]byte(`text/plain; foo=bar; charset=utf-8`), func(key, value []byte) bool { + parsed = append(parsed, [2]string{string(key), string(value)}) + return !bytes.Equal(key, []byte("foo")) + }) + + if len(parsed) != 1 { + t.Fatalf("expected 1 HTTP parameter, parsed %v", len(parsed)) + } + + if parsed[0] != [2]string{"foo", "bar"} { + t.Fatalf("unexpected parameter %v=%v. Expecting foo=bar", parsed[0][0], parsed[0][1]) + } +} + +func testVisitHeaderParams(t *testing.T, header string, expectedParams [][2]string) { + parsed := make([][2]string, 0) + VisitHeaderParams([]byte(header), func(key, value []byte) bool { + parsed = append(parsed, [2]string{string(key), string(value)}) + return true + }) + + if len(parsed) != len(expectedParams) { + t.Fatalf("expected %v HTTP parameters, parsed %v", len(expectedParams), len(parsed)) + } + + for i := range expectedParams { + if expectedParams[i] != parsed[i] { + t.Fatalf("unexpected parameter %v=%v. Expecting %v=%v", parsed[i][0], parsed[i][1], expectedParams[i][0], expectedParams[i][1]) + } + } +} + +func FuzzVisitHeaderParams(f *testing.F) { + inputs := []string{ + `application/json; v=1; foo=bar; q=0.938; param=param; param="big fox"; q=0.43`, + `*/*`, + `\\`, + `text/plain; foo="\\\"\'\\''\'"`, + } + for _, input := range inputs { + f.Add([]byte(input)) + } + f.Fuzz(func(t *testing.T, header []byte) { + VisitHeaderParams(header, func(key, value []byte) bool { + if len(key) == 0 { + t.Errorf("Unexpected length zero parameter, failed input was: %s", header) + } + return true + }) + }) +} + func TestRequestMultipartFormBoundary(t *testing.T) { t.Parallel() diff --git a/header_timing_test.go b/header_timing_test.go index 66698c46f2..90f65ae53c 100644 --- a/header_timing_test.go +++ b/header_timing_test.go @@ -180,6 +180,19 @@ func benchmarkNormalizeHeaderKey(b *testing.B, src []byte) { }) } +func BenchmarkVisitHeaderParams(b *testing.B) { + var h RequestHeader + h.SetBytesKV(strContentType, []byte(`text/plain ; foo=bar ; param2="dquote is: [\"], ok?" ; version=1; q=0.324 `)) + + header := h.ContentType() + b.ReportAllocs() + b.ResetTimer() + + for n := 0; n < b.N; n++ { + VisitHeaderParams(header, func(key, value []byte) bool { return true }) + } +} + func BenchmarkRemoveNewLines(b *testing.B) { type testcase struct { value string