Skip to content

Commit

Permalink
do not share the original input string
Browse files Browse the repository at this point in the history
This allows the original input string to be garbage collected, so it
should not change the memory footprint. This is a slower than the
version that shares a string, but only a small amount. It is still
faster than binary parsing (until that is optimized).

benchstat difference of original versus this version:

                               │  orig.txt   │     new-do-not-share-string.txt     │
                               │   sec/op    │   sec/op     vs base                │
HstoreScan/databasesql.Scan-10   82.11µ ± 1%   14.24µ ± 2%  -82.66% (p=0.000 n=10)
HstoreScan/text-10               83.30µ ± 1%   14.97µ ± 1%  -82.03% (p=0.000 n=10)
HstoreScan/binary-10             15.99µ ± 2%   15.80µ ± 0%   -1.16% (p=0.024 n=10)
geomean                          47.82µ        14.99µ       -68.66%

                               │   orig.txt   │     new-do-not-share-string.txt      │
                               │     B/op     │     B/op      vs base                │
HstoreScan/databasesql.Scan-10   56.23Ki ± 0%   20.11Ki ± 0%  -64.24% (p=0.000 n=10)
HstoreScan/text-10               65.12Ki ± 0%   29.00Ki ± 0%  -55.47% (p=0.000 n=10)
HstoreScan/binary-10             21.09Ki ± 0%   21.09Ki ± 0%        ~ (p=0.722 n=10)
geomean                          42.58Ki        23.08Ki       -45.80%

                               │  orig.txt  │     new-do-not-share-string.txt      │
                               │ allocs/op  │ allocs/op   vs base                  │
HstoreScan/databasesql.Scan-10   744.0 ± 0%   340.0 ± 0%  -54.30% (p=0.000 n=10)
HstoreScan/text-10               743.0 ± 0%   339.0 ± 0%  -54.37% (p=0.000 n=10)
HstoreScan/binary-10             464.0 ± 0%   464.0 ± 0%        ~ (p=1.000 n=10) ¹
geomean                          635.4        376.8       -40.70%
¹ all samples are equal

benchstat difference of the shared string versus not:

                               │ new-share-string.txt │     new-do-not-share-string.txt     │
                               │        sec/op        │   sec/op     vs base                │
HstoreScan/databasesql.Scan-10            10.57µ ± 2%   14.24µ ± 2%  +34.69% (p=0.000 n=10)
HstoreScan/text-10                        11.60µ ± 2%   14.97µ ± 1%  +29.03% (p=0.000 n=10)
HstoreScan/binary-10                      15.87µ ± 2%   15.80µ ± 0%        ~ (p=0.280 n=10)
geomean                                   12.48µ        14.99µ       +20.07%

                               │ new-share-string.txt │     new-do-not-share-string.txt      │
                               │         B/op         │     B/op      vs base                │
HstoreScan/databasesql.Scan-10           11.68Ki ± 0%   20.11Ki ± 0%  +72.17% (p=0.000 n=10)
HstoreScan/text-10                       20.58Ki ± 0%   29.00Ki ± 0%  +40.93% (p=0.000 n=10)
HstoreScan/binary-10                     21.08Ki ± 0%   21.09Ki ± 0%        ~ (p=0.427 n=10)
geomean                                  17.17Ki        23.08Ki       +34.39%

                               │ new-share-string.txt │      new-do-not-share-string.txt       │
                               │      allocs/op       │  allocs/op   vs base                   │
HstoreScan/databasesql.Scan-10             44.00 ± 0%   340.00 ± 0%  +672.73% (p=0.000 n=10)
HstoreScan/text-10                         44.00 ± 0%   339.00 ± 0%  +670.45% (p=0.000 n=10)
HstoreScan/binary-10                       464.0 ± 0%    464.0 ± 0%         ~ (p=1.000 n=10) ¹
geomean                                    96.49         376.8       +290.47%
  • Loading branch information
evanj authored and jackc committed Jun 16, 2023
1 parent d48d36d commit 07670dd
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions pgtype/hstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ func (p *hstoreParser) consumeExpected2(one byte, two byte) error {
var errEOSInQuoted = errors.New(`found end before closing double-quote ('"')`)

// consumeDoubleQuoted consumes a double-quoted string from p. The double quote must have been
// parsed already.
// parsed already. This copies the string from the backing string so it can be garbage collected.
func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
// fast path: assume most keys/values do not contain escapes
nextDoubleQuote := strings.IndexByte(p.str[p.pos:], '"')
Expand All @@ -341,8 +341,9 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
}
nextDoubleQuote += p.pos
if p.nextBackslash == -1 || p.nextBackslash > nextDoubleQuote {
// no escapes in this string
s := p.str[p.pos:nextDoubleQuote]
// clone the string from the source string to ensure it can be garbage collected separately
// TODO: use strings.Clone on Go 1.20; this could get optimized away
s := strings.Clone(p.str[p.pos:nextDoubleQuote])
p.pos = nextDoubleQuote + 1
return s, nil
}
Expand All @@ -357,7 +358,8 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) {
}

// consumeDoubleQuotedWithEscapes consumes a double-quoted string containing escapes, starting
// at p.pos, and with the first backslash at firstBackslash.
// at p.pos, and with the first backslash at firstBackslash. This copies the string so it can be
// garbage collected separately.
func (p *hstoreParser) consumeDoubleQuotedWithEscapes(firstBackslash int) (string, error) {
// copy the prefix that does not contain backslashes
var builder strings.Builder
Expand Down

0 comments on commit 07670dd

Please sign in to comment.