Skip to content

Commit

Permalink
pgtype/hstore: Avoid Postgres Mac OS X parsing bug
Browse files Browse the repository at this point in the history
Postgres on Mac OS X has a bug in how it parses hstore text values
that causes it to misinterpret some Unicode values as spaces. This
causes values sent by pgx to be misinterpreted. To avoid this, always
quote hstore values, which is how Postgres serializes them itself.
The test change fails on Mac OS X without this fix.

While I suspect this should not be performance critical for any
application, I added a quick benchmark to test the performance of the
encoding. This change actually makes encoding slightly faster on my
M1 Pro. The output from the benchstat program on this banchmark is:

goos: darwin
goarch: arm64
pkg: github.com/jackc/pgx/v5/pgtype
                          │   orig.txt   │           new-quotes.txt            │
                          │    sec/op    │   sec/op     vs base                │
HstoreSerialize/text-10      207.1n ± 0%   142.3n ± 1%  -31.31% (p=0.000 n=10)
HstoreSerialize/binary-10   100.10n ± 0%   99.64n ± 1%   -0.45% (p=0.013 n=10)
geomean                      144.0n        119.1n       -17.31%

I have also attempted to fix the Postgres bug, but it will take a
long time for this fix to get upstream:

https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
  • Loading branch information
evanj authored and jackc committed Jun 7, 2023
1 parent d9560c7 commit ee04d4a
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 16 deletions.
25 changes: 9 additions & 16 deletions pgtype/hstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"encoding/binary"
"errors"
"fmt"
"strings"
"unicode"
"unicode/utf8"

Expand Down Expand Up @@ -137,13 +136,20 @@ func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, e
buf = append(buf, ',')
}

buf = append(buf, quoteHstoreElementIfNeeded(k)...)
// unconditionally quote hstore keys/values like Postgres does
// this avoids a Mac OS X Postgres hstore parsing bug:
// https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
buf = append(buf, '"')
buf = append(buf, quoteArrayReplacer.Replace(k)...)
buf = append(buf, '"')
buf = append(buf, "=>"...)

if v == nil {
buf = append(buf, "NULL"...)
} else {
buf = append(buf, quoteHstoreElementIfNeeded(*v)...)
buf = append(buf, '"')
buf = append(buf, quoteArrayReplacer.Replace(*v)...)
buf = append(buf, '"')
}
}

Expand Down Expand Up @@ -271,19 +277,6 @@ func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (
return hstore, nil
}

func quoteHstoreElementIfNeeded(src string) string {
// Double-quote keys and values that include whitespace, commas, =s or >s. To include a double
// quote or a backslash in a key or value, escape it with a backslash.
// From: https://www.postgresql.org/docs/current/hstore.html
// whitespace appears to be defined as the isspace() C function: \t\n\v\f\r\n and space
const quoteRequiredChars = `,"\=> ` + "\t\n\v\f\r"
if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || strings.ContainsAny(src, quoteRequiredChars) {
return quoteArrayElement(src)
}

return src
}

const (
hsPre = iota
hsKey
Expand Down
33 changes: 33 additions & 0 deletions pgtype/hstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func TestHstoreCodec(t *testing.T) {
"form\\ffeed",
"carriage\rreturn",
"curly{}braces",
// Postgres on Mac OS X hstore parsing bug:
// ą = "\xc4\x85" in UTF-8; isspace(0x85) on Mac OS X returns true instead of false
"mac_bugą",
}
for _, s := range specialStrings {
// Special key values
Expand Down Expand Up @@ -255,3 +258,33 @@ func TestParseInvalidInputs(t *testing.T) {
}
}
}

func BenchmarkHstoreEncode(b *testing.B) {
stringPtr := func(s string) *string {
return &s
}
h := pgtype.Hstore{"a x": stringPtr("100"), "b": stringPtr("200"), "c": stringPtr("300"),
"d": stringPtr("400"), "e": stringPtr("500")}

serializeConfigs := []struct {
name string
encodePlan pgtype.EncodePlan
}{
{"text", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.TextFormatCode, h)},
{"binary", pgtype.HstoreCodec{}.PlanEncode(nil, 0, pgtype.BinaryFormatCode, h)},
}

for _, serializeConfig := range serializeConfigs {
var buf []byte
b.Run(serializeConfig.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
var err error
buf, err = serializeConfig.encodePlan.Encode(h, buf)
if err != nil {
b.Fatal(err)
}
buf = buf[:0]
}
})
}
}

0 comments on commit ee04d4a

Please sign in to comment.