From ee04d4a74d89fb1ed733ba3208fd5b182f8f050c Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Wed, 7 Jun 2023 11:58:05 -0400 Subject: [PATCH] pgtype/hstore: Avoid Postgres Mac OS X parsing bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Postgres on Mac OS X has a bug in how it parses hstore text values that causes it to misinterpret some Unicode values as spaces. This causes values sent by pgx to be misinterpreted. To avoid this, always quote hstore values, which is how Postgres serializes them itself. The test change fails on Mac OS X without this fix. While I suspect this should not be performance critical for any application, I added a quick benchmark to test the performance of the encoding. This change actually makes encoding slightly faster on my M1 Pro. The output from the benchstat program on this banchmark is: goos: darwin goarch: arm64 pkg: github.com/jackc/pgx/v5/pgtype │ orig.txt │ new-quotes.txt │ │ sec/op │ sec/op vs base │ HstoreSerialize/text-10 207.1n ± 0% 142.3n ± 1% -31.31% (p=0.000 n=10) HstoreSerialize/binary-10 100.10n ± 0% 99.64n ± 1% -0.45% (p=0.013 n=10) geomean 144.0n 119.1n -17.31% I have also attempted to fix the Postgres bug, but it will take a long time for this fix to get upstream: https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com --- pgtype/hstore.go | 25 +++++++++---------------- pgtype/hstore_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/pgtype/hstore.go b/pgtype/hstore.go index f3ae78380..ca0c3f6f7 100644 --- a/pgtype/hstore.go +++ b/pgtype/hstore.go @@ -6,7 +6,6 @@ import ( "encoding/binary" "errors" "fmt" - "strings" "unicode" "unicode/utf8" @@ -137,13 +136,20 @@ func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, e buf = append(buf, ',') } - buf = append(buf, quoteHstoreElementIfNeeded(k)...) + // unconditionally quote hstore keys/values like Postgres does + // this avoids a Mac OS X Postgres hstore parsing bug: + // https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com + buf = append(buf, '"') + buf = append(buf, quoteArrayReplacer.Replace(k)...) + buf = append(buf, '"') buf = append(buf, "=>"...) if v == nil { buf = append(buf, "NULL"...) } else { - buf = append(buf, quoteHstoreElementIfNeeded(*v)...) + buf = append(buf, '"') + buf = append(buf, quoteArrayReplacer.Replace(*v)...) + buf = append(buf, '"') } } @@ -271,19 +277,6 @@ func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) ( return hstore, nil } -func quoteHstoreElementIfNeeded(src string) string { - // Double-quote keys and values that include whitespace, commas, =s or >s. To include a double - // quote or a backslash in a key or value, escape it with a backslash. - // From: https://www.postgresql.org/docs/current/hstore.html - // whitespace appears to be defined as the isspace() C function: \t\n\v\f\r\n and space - const quoteRequiredChars = `,"\=> ` + "\t\n\v\f\r" - if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || strings.ContainsAny(src, quoteRequiredChars) { - return quoteArrayElement(src) - } - - return src -} - const ( hsPre = iota hsKey diff --git a/pgtype/hstore_test.go b/pgtype/hstore_test.go index aa6881c5c..e2b6bb4ef 100644 --- a/pgtype/hstore_test.go +++ b/pgtype/hstore_test.go @@ -129,6 +129,9 @@ func TestHstoreCodec(t *testing.T) { "form\\ffeed", "carriage\rreturn", "curly{}braces", + // Postgres on Mac OS X hstore parsing bug: + // ą = "\xc4\x85" in UTF-8; isspace(0x85) on Mac OS X returns true instead of false + "mac_bugą", } for _, s := range specialStrings { // Special key values @@ -255,3 +258,33 @@ func TestParseInvalidInputs(t *testing.T) { } } } + +func BenchmarkHstoreEncode(b *testing.B) { + stringPtr := func(s string) *string { + return &s + } + h := pgtype.Hstore{"a x": stringPtr("100"), "b": stringPtr("200"), "c": stringPtr("300"), + "d": stringPtr("400"), "e": stringPtr("500")} + + serializeConfigs := []struct { + name string + encodePlan pgtype.EncodePlan + }{ + {"text", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.TextFormatCode, h)}, + {"binary", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.BinaryFormatCode, h)}, + } + + for _, serializeConfig := range serializeConfigs { + var buf []byte + b.Run(serializeConfig.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + var err error + buf, err = serializeConfig.encodePlan.Encode(h, buf) + if err != nil { + b.Fatal(err) + } + buf = buf[:0] + } + }) + } +}