-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix a number of encoding issues when evaluating expressions with the evalengine #13509
Changes from 12 commits
e40a738
66fac65
4666428
d2c421f
41ab596
8da8cdf
cd02256
2aee93a
12d3cc9
e382a3f
f780e60
7afedbf
67b164f
afcebb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,7 @@ func convertFastFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, er | |
if dst == nil { | ||
dst = make([]byte, len(src)*3) | ||
} else { | ||
nDst = len(dst) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one and the other changes fix bugs when appending to an existing buffer. The additional |
||
dst = dst[:cap(dst)] | ||
} | ||
|
||
|
@@ -65,6 +66,7 @@ func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) | |
if dst == nil { | ||
dst = make([]byte, len(src)*3) | ||
} else { | ||
nDst = len(dst) | ||
dst = dst[:cap(dst)] | ||
} | ||
|
||
|
@@ -180,6 +182,7 @@ func Collapse(dst []byte, src []rune, dstCharset Charset) []byte { | |
if dst == nil { | ||
dst = make([]byte, len(src)*dstCharset.MaxWidth()) | ||
} else { | ||
nDst = len(dst) | ||
dst = dst[:cap(dst)] | ||
} | ||
for _, c := range src { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -194,7 +194,7 @@ func makeEnv(version collver) *Environment { | |
// A few interesting character set values. | ||
// See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet | ||
const ( | ||
CollationUtf8ID = 33 | ||
CollationUtf8mb3ID = 33 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned, rename for explicitness. |
||
CollationUtf8mb4ID = 255 | ||
CollationBinaryID = 63 | ||
CollationUtf8mb4BinID = 46 | ||
|
@@ -204,6 +204,16 @@ const ( | |
// Binary is the default Binary collation | ||
var Binary = ID(CollationBinaryID).Get() | ||
|
||
// SystemCollation is the default collation for the system tables | ||
// such as the information schema. This is still utf8mb3 to match | ||
// MySQLs behavior. This means that you can't use utf8mb4 in table | ||
// names, column names, without running into significant issues. | ||
var SystemCollation = TypedCollation{ | ||
Collation: CollationUtf8mb3ID, | ||
Coercibility: CoerceCoercible, | ||
Repertoire: RepertoireUnicode, | ||
} | ||
|
||
// CharsetAlias returns the internal charset name for the given charset. | ||
// For now, this only maps `utf8` to `utf8mb3`; in future versions of MySQL, | ||
// this mapping will change, so it's important to use this helper so that | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
Copyright 2023 The Vitess Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package hex | ||
dbussink marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
import ( | ||
"encoding/hex" | ||
"math/bits" | ||
) | ||
|
||
const hextable = "0123456789ABCDEF" | ||
|
||
func EncodeBytes(src []byte) []byte { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These have been moved since we now used them in two places. |
||
j := 0 | ||
dst := make([]byte, len(src)*2) | ||
for _, v := range src { | ||
dst[j] = hextable[v>>4] | ||
dst[j+1] = hextable[v&0x0f] | ||
j += 2 | ||
} | ||
return dst | ||
} | ||
|
||
func EncodeUint(u uint64) []byte { | ||
var a [16 + 1]byte | ||
i := len(a) | ||
shift := uint(bits.TrailingZeros(uint(16))) & 7 | ||
b := uint64(16) | ||
m := uint(16) - 1 // == 1<<shift - 1 | ||
|
||
for u >= b { | ||
i-- | ||
a[i] = hextable[uint(u)&m] | ||
u >>= shift | ||
} | ||
|
||
// u < base | ||
i-- | ||
a[i] = hextable[uint(u)] | ||
return a[i:] | ||
} | ||
|
||
func DecodeUint(u uint64) []byte { | ||
if u == 0 { | ||
return []byte{0} | ||
} | ||
var decoded []byte | ||
for u > 0 { | ||
c1 := u % 10 | ||
c2 := u % 100 / 10 | ||
decoded = append([]byte{byte(c1 + c2<<4)}, decoded...) | ||
u /= 100 | ||
} | ||
return decoded | ||
} | ||
|
||
func DecodedLen(src []byte) int { | ||
return (len(src) + 1) / 2 | ||
} | ||
|
||
func DecodeBytes(dst, src []byte) bool { | ||
if len(src)&1 == 1 { | ||
src = append([]byte{'0'}, src...) | ||
} | ||
_, err := hex.Decode(dst, src) | ||
return err == nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're now consistent with MySQL which upcases by default for hex literals.