Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

evalengine: Coercion, LIKE, IN optimization #9286

Merged
merged 12 commits into from
Nov 25, 2021
2 changes: 1 addition & 1 deletion go/mysql/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func setCollationForConnection(c *Conn, params *ConnParams) error {
// getHandshakeCharacterSet returns the collation ID of DefaultCollation in an
// 8 bits integer which will be used to feed the handshake protocol's packet.
func getHandshakeCharacterSet() (uint8, error) {
coll := collations.Default().LookupByName(DefaultCollation)
coll := collations.Local().LookupByName(DefaultCollation)
if coll == nil {
// theoretically, this should never happen from an end user perspective
return 0, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "cannot resolve collation ID for collation: '%s'", DefaultCollation)
Expand Down
4 changes: 3 additions & 1 deletion go/mysql/collations/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,14 @@ func weightStringPadingSimple(padChar byte, dst []byte, numCodepoints int, padTo
return dst
}

const CollationBinaryID ID = 63

type Collation_binary struct{}

func (c *Collation_binary) Init() {}

func (c *Collation_binary) ID() ID {
return 63
return CollationBinaryID
}

func (c *Collation_binary) Name() string {
Expand Down
71 changes: 71 additions & 0 deletions go/mysql/collations/cached_size.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

148 changes: 71 additions & 77 deletions go/mysql/collations/coercion.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
)

func init() {
if unsafe.Sizeof(TypedCollationID{}) != 4 {
panic("TypedCollationID should fit in an int32")
if unsafe.Sizeof(TypedCollation{}) != 4 {
panic("TypedCollation should fit in an int32")
}
}

Expand Down Expand Up @@ -95,82 +95,66 @@ const (
RepertoireUnicode
)

// Coercion is a function that will transform either the left or right
// arguments of the function into the same character set. The `dst` argument
// Coercion is a function that will transform either the given argument
// arguments of the function into a specific character set. The `dst` argument
// will be used as the destination of the coerced argument, but it can be nil.
// The function returns the given left and right arguments: one of the arguments
// will be the same value that was passed in, while the other will be the
// same value but transcoded into a different character set, depending on which
// of the arguments is supposed to be coerced.
// If the contents of the argument that must be transcoded cannot be mapped
// to the target charset, an error will be returned.
type Coercion func(dst, left, right []byte) ([]byte, []byte, error)
type Coercion func(dst, in []byte) ([]byte, error)

// TypedCollation is the Collation of a SQL expression, including its coercibility
// and repertoire.
type TypedCollation struct {
Collation Collation
Coercibility Coercibility
Repertoire Repertoire
}

// TypedCollationID is like TypedCollation but the actual collation is stored with its
// Collation ID, so the total size of the struct is 4 bytes. This is useful for type
// processing in the AST.
type TypedCollationID struct {
Collation ID
Coercibility Coercibility
Repertoire Repertoire
}

func (env *Environment) TypedCollation(tid TypedCollationID) *TypedCollation {
return &TypedCollation{
Collation: env.LookupByID(tid.Collation),
Coercibility: tid.Coercibility,
Repertoire: tid.Repertoire,
}
func (tc TypedCollation) Valid() bool {
return tc.Collation != Unknown
}

func checkCompatibleCollations(left, right *TypedCollation) bool {
leftCS := left.Collation.Charset()
rightCS := right.Collation.Charset()
func checkCompatibleCollations(
left Collation, leftCoercibility Coercibility, leftRepertoire Repertoire,
right Collation, rightCoercibility Coercibility, rightRepertoire Repertoire,
) bool {
leftCS := left.Charset()
rightCS := right.Charset()

switch leftCS.(type) {
case charset.Charset_utf8mb4:
if left.Coercibility <= right.Coercibility {
if leftCoercibility <= rightCoercibility {
return true
}

case charset.Charset_utf32:
switch {
case left.Coercibility < right.Coercibility:
case leftCoercibility < rightCoercibility:
return true
case left.Coercibility == right.Coercibility:
case leftCoercibility == rightCoercibility:
if !charset.IsUnicode(rightCS) {
return true
}
if !left.Collation.IsBinary() {
if !left.IsBinary() {
return true
}
}

case charset.Charset_utf8, charset.Charset_ucs2, charset.Charset_utf16, charset.Charset_utf16le:
switch {
case left.Coercibility < right.Coercibility:
case leftCoercibility < rightCoercibility:
return true
case left.Coercibility == right.Coercibility:
case leftCoercibility == rightCoercibility:
if !charset.IsUnicode(rightCS) {
return true
}
}
}

if right.Repertoire == RepertoireASCII {
if rightRepertoire == RepertoireASCII {
switch {
case left.Coercibility < right.Coercibility:
case leftCoercibility < rightCoercibility:
return true
case left.Coercibility == right.Coercibility:
if left.Repertoire == RepertoireUnicode {
case leftCoercibility == rightCoercibility:
if leftRepertoire == RepertoireUnicode {
return true
}
}
Expand All @@ -179,10 +163,6 @@ func checkCompatibleCollations(left, right *TypedCollation) bool {
return false
}

func noCoercion(_, left, right []byte) ([]byte, []byte, error) {
return left, right, nil
}

// CoercionOptions is used to configure how aggressive the algorithm can be
// when merging two different collations by transcoding them.
type CoercionOptions struct {
Expand Down Expand Up @@ -222,71 +202,75 @@ type CoercionOptions struct {
//
// If the collations for both sides of the expression are not compatible, an error
// will be returned and the returned TypedCollation and Coercion will be nil.
func (env *Environment) MergeCollations(left, right *TypedCollation, opt CoercionOptions) (*TypedCollation, Coercion, error) {
leftCS := left.Collation.Charset()
rightCS := right.Collation.Charset()
func (env *Environment) MergeCollations(left, right TypedCollation, opt CoercionOptions) (TypedCollation, Coercion, Coercion, error) {
leftColl := env.LookupByID(left.Collation)
rightColl := env.LookupByID(right.Collation)
if leftColl == nil || rightColl == nil {
return TypedCollation{}, nil, nil, fmt.Errorf("unsupported TypeCollationID: %v / %v", left.Collation, right.Collation)
}
leftCS := leftColl.Charset()
rightCS := rightColl.Charset()

if leftCS.Name() == rightCS.Name() {
switch {
case left.Coercibility < right.Coercibility:
left.Repertoire |= right.Repertoire
return left, noCoercion, nil
return left, nil, nil, nil

case left.Coercibility > right.Coercibility:
right.Repertoire |= left.Repertoire
return right, noCoercion, nil
return right, nil, nil, nil

case left.Collation.ID() == right.Collation.ID():
case left.Collation == right.Collation:
left.Repertoire |= right.Repertoire
return left, noCoercion, nil
return left, nil, nil, nil
}

if left.Coercibility == CoerceExplicit {
goto cannotCoerce
}

leftCsBin := left.Collation.IsBinary()
rightCsBin := right.Collation.IsBinary()
leftCsBin := leftColl.IsBinary()
rightCsBin := rightColl.IsBinary()

switch {
case leftCsBin && rightCsBin:
left.Coercibility = CoerceNone
return left, noCoercion, nil
return left, nil, nil, nil

case leftCsBin:
return left, noCoercion, nil
return left, nil, nil, nil

case rightCsBin:
return right, noCoercion, nil
return right, nil, nil, nil
}

defaults := env.byCharset[leftCS.Name()]
defaults.Binary.Init()
return &TypedCollation{
Collation: defaults.Binary,
return TypedCollation{
Collation: defaults.Binary.ID(),
Coercibility: CoerceNone,
Repertoire: left.Repertoire | right.Repertoire,
}, noCoercion, nil
}, nil, nil, nil
}

if _, leftIsBinary := left.Collation.(*Collation_binary); leftIsBinary {
if _, leftIsBinary := leftColl.(*Collation_binary); leftIsBinary {
if left.Coercibility <= right.Coercibility {
return left, noCoercion, nil
return left, nil, nil, nil
}
return right, noCoercion, nil
return right, nil, nil, nil
}
if _, rightIsBinary := right.Collation.(*Collation_binary); rightIsBinary {
if _, rightIsBinary := rightColl.(*Collation_binary); rightIsBinary {
if left.Coercibility >= right.Coercibility {
return right, noCoercion, nil
return right, nil, nil, nil
}
return left, noCoercion, nil
return left, nil, nil, nil
}

if opt.ConvertToSuperset {
if checkCompatibleCollations(left, right) {
if checkCompatibleCollations(leftColl, left.Coercibility, left.Repertoire, rightColl, right.Coercibility, right.Repertoire) {
goto coerceToLeft
}
if checkCompatibleCollations(right, left) {
if checkCompatibleCollations(rightColl, right.Coercibility, right.Repertoire, leftColl, left.Coercibility, left.Repertoire) {
goto coerceToRight
}
}
Expand All @@ -301,18 +285,28 @@ func (env *Environment) MergeCollations(left, right *TypedCollation, opt Coercio
}

cannotCoerce:
return nil, nil, fmt.Errorf("Illegal mix of collations (%s,%s) and (%s,%s)",
left.Collation.Name(), left.Coercibility, right.Collation.Name(), right.Coercibility)
return TypedCollation{}, nil, nil, fmt.Errorf("Illegal mix of collations (%s,%s) and (%s,%s)",
leftColl.Name(), left.Coercibility, rightColl.Name(), right.Coercibility)

coerceToLeft:
return left, func(dst, left, right []byte) ([]byte, []byte, error) {
trans, err := charset.Convert(dst, leftCS, right, rightCS)
return left, trans, err
}, nil
return left, nil,
func(dst, in []byte) ([]byte, error) {
return charset.Convert(dst, leftCS, in, rightCS)
}, nil

coerceToRight:
return right, func(dst, left, right []byte) ([]byte, []byte, error) {
trans, err := charset.Convert(dst, rightCS, left, leftCS)
return trans, right, err
}, nil
return right,
func(dst, in []byte) ([]byte, error) {
return charset.Convert(dst, rightCS, in, leftCS)
}, nil, nil
}

func (env *Environment) EnsureCollate(fromID, toID ID) error {
// these two lookups should never fail
from := env.LookupByID(fromID)
to := env.LookupByID(toID)
if from.Charset().Name() != to.Charset().Name() {
return fmt.Errorf("COLLATION '%s' is not valid for CHARACTER SET '%s'", to.Name(), from.Charset().Name())
}
return nil
}
20 changes: 16 additions & 4 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"fmt"
"strings"
"sync"

"vitess.io/vitess/go/vt/servenv"
)

type colldefaults struct {
Expand Down Expand Up @@ -229,8 +231,18 @@ func makeEnv(version collver) *Environment {
return env
}

// Default is the default collation Environment for Vitess. This is set to
// the collation set and defaults available in MySQL 8.0
func Default() *Environment {
return fetchCacheEnvironment(collverMySQL80)
var defaultEnv *Environment
var defaultEnvInit sync.Once

// Local is the default collation Environment for Vitess. This depends
// on the value of the `mysql_server_version` flag passed to this Vitess process.
func Local() *Environment {
defaultEnvInit.Do(func() {
if *servenv.MySQLServerVersion == "" {
defaultEnv = fetchCacheEnvironment(collverMySQL80)
} else {
defaultEnv = NewEnvironment(*servenv.MySQLServerVersion)
}
})
return defaultEnv
}
Loading