From 07670dddca6f3adf6b451ab27ee3781e82e4bebd Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Fri, 16 Jun 2023 15:37:40 -0400 Subject: [PATCH] do not share the original input string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the original input string to be garbage collected, so it should not change the memory footprint. This is a slower than the version that shares a string, but only a small amount. It is still faster than binary parsing (until that is optimized). benchstat difference of original versus this version: │ orig.txt │ new-do-not-share-string.txt │ │ sec/op │ sec/op vs base │ HstoreScan/databasesql.Scan-10 82.11µ ± 1% 14.24µ ± 2% -82.66% (p=0.000 n=10) HstoreScan/text-10 83.30µ ± 1% 14.97µ ± 1% -82.03% (p=0.000 n=10) HstoreScan/binary-10 15.99µ ± 2% 15.80µ ± 0% -1.16% (p=0.024 n=10) geomean 47.82µ 14.99µ -68.66% │ orig.txt │ new-do-not-share-string.txt │ │ B/op │ B/op vs base │ HstoreScan/databasesql.Scan-10 56.23Ki ± 0% 20.11Ki ± 0% -64.24% (p=0.000 n=10) HstoreScan/text-10 65.12Ki ± 0% 29.00Ki ± 0% -55.47% (p=0.000 n=10) HstoreScan/binary-10 21.09Ki ± 0% 21.09Ki ± 0% ~ (p=0.722 n=10) geomean 42.58Ki 23.08Ki -45.80% │ orig.txt │ new-do-not-share-string.txt │ │ allocs/op │ allocs/op vs base │ HstoreScan/databasesql.Scan-10 744.0 ± 0% 340.0 ± 0% -54.30% (p=0.000 n=10) HstoreScan/text-10 743.0 ± 0% 339.0 ± 0% -54.37% (p=0.000 n=10) HstoreScan/binary-10 464.0 ± 0% 464.0 ± 0% ~ (p=1.000 n=10) ¹ geomean 635.4 376.8 -40.70% ¹ all samples are equal benchstat difference of the shared string versus not: │ new-share-string.txt │ new-do-not-share-string.txt │ │ sec/op │ sec/op vs base │ HstoreScan/databasesql.Scan-10 10.57µ ± 2% 14.24µ ± 2% +34.69% (p=0.000 n=10) HstoreScan/text-10 11.60µ ± 2% 14.97µ ± 1% +29.03% (p=0.000 n=10) HstoreScan/binary-10 15.87µ ± 2% 15.80µ ± 0% ~ (p=0.280 n=10) geomean 12.48µ 14.99µ +20.07% │ new-share-string.txt │ new-do-not-share-string.txt │ │ B/op │ B/op vs base │ HstoreScan/databasesql.Scan-10 11.68Ki ± 0% 20.11Ki ± 0% +72.17% (p=0.000 n=10) HstoreScan/text-10 20.58Ki ± 0% 29.00Ki ± 0% +40.93% (p=0.000 n=10) HstoreScan/binary-10 21.08Ki ± 0% 21.09Ki ± 0% ~ (p=0.427 n=10) geomean 17.17Ki 23.08Ki +34.39% │ new-share-string.txt │ new-do-not-share-string.txt │ │ allocs/op │ allocs/op vs base │ HstoreScan/databasesql.Scan-10 44.00 ± 0% 340.00 ± 0% +672.73% (p=0.000 n=10) HstoreScan/text-10 44.00 ± 0% 339.00 ± 0% +670.45% (p=0.000 n=10) HstoreScan/binary-10 464.0 ± 0% 464.0 ± 0% ~ (p=1.000 n=10) ¹ geomean 96.49 376.8 +290.47% --- pgtype/hstore.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pgtype/hstore.go b/pgtype/hstore.go index a8559d8a6..c6f835443 100644 --- a/pgtype/hstore.go +++ b/pgtype/hstore.go @@ -332,7 +332,7 @@ func (p *hstoreParser) consumeExpected2(one byte, two byte) error { var errEOSInQuoted = errors.New(`found end before closing double-quote ('"')`) // consumeDoubleQuoted consumes a double-quoted string from p. The double quote must have been -// parsed already. +// parsed already. This copies the string from the backing string so it can be garbage collected. func (p *hstoreParser) consumeDoubleQuoted() (string, error) { // fast path: assume most keys/values do not contain escapes nextDoubleQuote := strings.IndexByte(p.str[p.pos:], '"') @@ -341,8 +341,9 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) { } nextDoubleQuote += p.pos if p.nextBackslash == -1 || p.nextBackslash > nextDoubleQuote { - // no escapes in this string - s := p.str[p.pos:nextDoubleQuote] + // clone the string from the source string to ensure it can be garbage collected separately + // TODO: use strings.Clone on Go 1.20; this could get optimized away + s := strings.Clone(p.str[p.pos:nextDoubleQuote]) p.pos = nextDoubleQuote + 1 return s, nil } @@ -357,7 +358,8 @@ func (p *hstoreParser) consumeDoubleQuoted() (string, error) { } // consumeDoubleQuotedWithEscapes consumes a double-quoted string containing escapes, starting -// at p.pos, and with the first backslash at firstBackslash. +// at p.pos, and with the first backslash at firstBackslash. This copies the string so it can be +// garbage collected separately. func (p *hstoreParser) consumeDoubleQuotedWithEscapes(firstBackslash int) (string, error) { // copy the prefix that does not contain backslashes var builder strings.Builder