diff --git a/protocol.go b/protocol.go index 5fefd790..0f4a4ac8 100644 --- a/protocol.go +++ b/protocol.go @@ -315,12 +315,35 @@ func flushResponseWriter(w http.ResponseWriter) { } } -func canonicalizeContentType(ct string) string { - base, params, err := mime.ParseMediaType(ct) - if err != nil { - return ct +func canonicalizeContentType(contentType string) string { + // Typically, clients send Content-Type in canonical form, without + // parameters. In those cases, we'd like to avoid parsing and + // canonicalization overhead. + // + // See https://www.rfc-editor.org/rfc/rfc2045.html#section-5.1 for a full + // grammar. + var slashes int + for _, r := range contentType { + switch { + case r >= 'a' && r <= 'z': + case r == '.' || r == '+' || r == '-': + case r == '/': + slashes++ + default: + return canonicalizeContentTypeSlow(contentType) + } + } + if slashes == 1 { + return contentType } + return canonicalizeContentTypeSlow(contentType) +} +func canonicalizeContentTypeSlow(contentType string) string { + base, params, err := mime.ParseMediaType(contentType) + if err != nil { + return contentType + } // According to RFC 9110 Section 8.3.2, the charset parameter value should be treated as case-insensitive. // mime.FormatMediaType canonicalizes parameter names, but not parameter values, // because the case sensitivity of a parameter value depends on its semantics. @@ -329,6 +352,5 @@ func canonicalizeContentType(ct string) string { if charset, ok := params["charset"]; ok { params["charset"] = strings.ToLower(charset) } - return mime.FormatMediaType(base, params) } diff --git a/protocol_test.go b/protocol_test.go index ab1c8115..0aec35f1 100644 --- a/protocol_test.go +++ b/protocol_test.go @@ -27,8 +27,10 @@ func TestCanonicalizeContentType(t *testing.T) { arg string want string }{ + {name: "uppercase should be normalized", arg: "APPLICATION/json", want: "application/json"}, {name: "charset param should be treated as lowercase", arg: "application/json; charset=UTF-8", want: "application/json; charset=utf-8"}, {name: "non charset param should not be changed", arg: "multipart/form-data; boundary=fooBar", want: "multipart/form-data; boundary=fooBar"}, + {name: "no parameters should be normalized", arg: "APPLICATION/json; ", want: "application/json"}, } for _, tt := range tests { tt := tt @@ -38,3 +40,26 @@ func TestCanonicalizeContentType(t *testing.T) { }) } } + +func BenchmarkCanonicalizeContentType(b *testing.B) { + b.Run("simple", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json") + } + b.ReportAllocs() + }) + + b.Run("with charset", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json; charset=utf-8") + } + b.ReportAllocs() + }) + + b.Run("with other param", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = canonicalizeContentType("application/json; foo=utf-8") + } + b.ReportAllocs() + }) +}