From 381031282d3048a0f1fbc042a1041a871506fef7 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Thu, 26 Jan 2023 11:27:52 -0800 Subject: [PATCH 1/5] Optimize canonicalizeContentType For context, this function is run on every request within ServeHTTP, and the mime parsing and now lowercasing, is a marginal, but non trivial amount of memory allocations. The expensive bit is hitting the mime.FormatMediaType path, when we already have a canonical form. This removes the most expensive part over arguable the most common cases where there are no additional parameters on the Content-Type. ``` $ go test -v -bench '^BenchmarkCanonicalizeContentType$' -run '^$' . goos: darwin goarch: arm64 pkg: github.com/bufbuild/connect-go BenchmarkCanonicalizeContentType BenchmarkCanonicalizeContentType/simple BenchmarkCanonicalizeContentType/simple-10 7160896 157.4 ns/op 48 B/op 1 allocs/op BenchmarkCanonicalizeContentType/with_charset BenchmarkCanonicalizeContentType/with_charset-10 1780041 674.4 ns/op 424 B/op 6 allocs/op BenchmarkCanonicalizeContentType/with_other_param BenchmarkCanonicalizeContentType/with_other_param-10 2029819 592.6 ns/op 424 B/op 6 allocs/op PASS ok github.com/bufbuild/connect-go 5.129s ``` --- protocol.go | 4 ++++ protocol_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/protocol.go b/protocol.go index 5fefd790..3fd4369d 100644 --- a/protocol.go +++ b/protocol.go @@ -321,6 +321,10 @@ func canonicalizeContentType(ct string) string { return ct } + if len(params) == 0 { + return base + } + // According to RFC 9110 Section 8.3.2, the charset parameter value should be treated as case-insensitive. // mime.FormatMediaType canonicalizes parameter names, but not parameter values, // because the case sensitivity of a parameter value depends on its semantics. diff --git a/protocol_test.go b/protocol_test.go index ab1c8115..e68c6c98 100644 --- a/protocol_test.go +++ b/protocol_test.go @@ -29,6 +29,7 @@ func TestCanonicalizeContentType(t *testing.T) { }{ {name: "charset param should be treated as lowercase", arg: "application/json; charset=UTF-8", want: "application/json; charset=utf-8"}, {name: "non charset param should not be changed", arg: "multipart/form-data; boundary=fooBar", want: "multipart/form-data; boundary=fooBar"}, + {name: "no parameters should not have base normalized", arg: "APPLICATION/json; ", want: "application/json"}, } for _, tt := range tests { tt := tt @@ -38,3 +39,26 @@ func TestCanonicalizeContentType(t *testing.T) { }) } } + +func BenchmarkCanonicalizeContentType(b *testing.B) { + b.Run("simple", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json") + } + b.ReportAllocs() + }) + + b.Run("with charset", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json; charset=utf-8") + } + b.ReportAllocs() + }) + + b.Run("with other param", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json; foo=utf-8") + } + b.ReportAllocs() + }) +} From 46c8e29951635f5570b348335de1a7d43aa52d33 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Fri, 27 Jan 2023 15:03:58 -0800 Subject: [PATCH 2/5] Fix test case description --- protocol_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol_test.go b/protocol_test.go index e68c6c98..94916047 100644 --- a/protocol_test.go +++ b/protocol_test.go @@ -29,7 +29,7 @@ func TestCanonicalizeContentType(t *testing.T) { }{ {name: "charset param should be treated as lowercase", arg: "application/json; charset=UTF-8", want: "application/json; charset=utf-8"}, {name: "non charset param should not be changed", arg: "multipart/form-data; boundary=fooBar", want: "multipart/form-data; boundary=fooBar"}, - {name: "no parameters should not have base normalized", arg: "APPLICATION/json; ", want: "application/json"}, + {name: "no parameters should be normalized", arg: "APPLICATION/json; ", want: "application/json"}, } for _, tt := range tests { tt := tt From 63d1c61d8ab4cd8e39e9f5d87ac468b0c0043efc Mon Sep 17 00:00:00 2001 From: Akshay Shah Date: Fri, 27 Jan 2023 15:19:19 -0800 Subject: [PATCH 3/5] Check for canonical, param-less content-types by hand --- protocol.go | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/protocol.go b/protocol.go index 3fd4369d..2cbc6baa 100644 --- a/protocol.go +++ b/protocol.go @@ -316,15 +316,35 @@ func flushResponseWriter(w http.ResponseWriter) { } func canonicalizeContentType(ct string) string { - base, params, err := mime.ParseMediaType(ct) - if err != nil { + // Typically, clients send Content-Type in canonical form, without + // parameters. In those cases, we'd like to avoid parsing and + // canonicalization overhead. + // + // See https://www.rfc-editor.org/rfc/rfc2045.html#section-5.1 for a full + // grammar. + var slashes int + for _, r := range ct { + switch { + case r >= 'A' && r <= 'Z': + case r >= 'a' && r <= 'z': + case r == '.' || r == '+' || r == '-': + case r == '/': + slashes++ + default: + return canonicalizeContentTypeSlow(ct) + } + } + if slashes == 1 { return ct } + return canonicalizeContentTypeSlow(ct) +} - if len(params) == 0 { - return base +func canonicalizeContentTypeSlow(ct string) string { + base, params, err := mime.ParseMediaType(ct) + if err != nil { + return ct } - // According to RFC 9110 Section 8.3.2, the charset parameter value should be treated as case-insensitive. // mime.FormatMediaType canonicalizes parameter names, but not parameter values, // because the case sensitivity of a parameter value depends on its semantics. @@ -333,6 +353,5 @@ func canonicalizeContentType(ct string) string { if charset, ok := params["charset"]; ok { params["charset"] = strings.ToLower(charset) } - return mime.FormatMediaType(base, params) } From 5f409122cb554224000d04b4a481621ea3023e96 Mon Sep 17 00:00:00 2001 From: Akshay Shah Date: Fri, 27 Jan 2023 15:24:55 -0800 Subject: [PATCH 4/5] Correctly normalize uppercase --- protocol.go | 1 - protocol_test.go | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/protocol.go b/protocol.go index 2cbc6baa..8a15d814 100644 --- a/protocol.go +++ b/protocol.go @@ -325,7 +325,6 @@ func canonicalizeContentType(ct string) string { var slashes int for _, r := range ct { switch { - case r >= 'A' && r <= 'Z': case r >= 'a' && r <= 'z': case r == '.' || r == '+' || r == '-': case r == '/': diff --git a/protocol_test.go b/protocol_test.go index 94916047..0aec35f1 100644 --- a/protocol_test.go +++ b/protocol_test.go @@ -27,6 +27,7 @@ func TestCanonicalizeContentType(t *testing.T) { arg string want string }{ + {name: "uppercase should be normalized", arg: "APPLICATION/json", want: "application/json"}, {name: "charset param should be treated as lowercase", arg: "application/json; charset=UTF-8", want: "application/json; charset=utf-8"}, {name: "non charset param should not be changed", arg: "multipart/form-data; boundary=fooBar", want: "multipart/form-data; boundary=fooBar"}, {name: "no parameters should be normalized", arg: "APPLICATION/json; ", want: "application/json"}, From 9d80dfbdbaed918a356df4ff516011b4bc0d0d1b Mon Sep 17 00:00:00 2001 From: Akshay Shah Date: Fri, 27 Jan 2023 15:40:33 -0800 Subject: [PATCH 5/5] verbosity++ --- protocol.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/protocol.go b/protocol.go index 8a15d814..0f4a4ac8 100644 --- a/protocol.go +++ b/protocol.go @@ -315,7 +315,7 @@ func flushResponseWriter(w http.ResponseWriter) { } } -func canonicalizeContentType(ct string) string { +func canonicalizeContentType(contentType string) string { // Typically, clients send Content-Type in canonical form, without // parameters. In those cases, we'd like to avoid parsing and // canonicalization overhead. @@ -323,26 +323,26 @@ func canonicalizeContentType(ct string) string { // See https://www.rfc-editor.org/rfc/rfc2045.html#section-5.1 for a full // grammar. var slashes int - for _, r := range ct { + for _, r := range contentType { switch { case r >= 'a' && r <= 'z': case r == '.' || r == '+' || r == '-': case r == '/': slashes++ default: - return canonicalizeContentTypeSlow(ct) + return canonicalizeContentTypeSlow(contentType) } } if slashes == 1 { - return ct + return contentType } - return canonicalizeContentTypeSlow(ct) + return canonicalizeContentTypeSlow(contentType) } -func canonicalizeContentTypeSlow(ct string) string { - base, params, err := mime.ParseMediaType(ct) +func canonicalizeContentTypeSlow(contentType string) string { + base, params, err := mime.ParseMediaType(contentType) if err != nil { - return ct + return contentType } // According to RFC 9110 Section 8.3.2, the charset parameter value should be treated as case-insensitive. // mime.FormatMediaType canonicalizes parameter names, but not parameter values,