Skip to content

Commit

Permalink
Handle aliasing of collation names
Browse files Browse the repository at this point in the history
With MySQL 8.0.30 and later, utf8mb3 is always reported as the charset
in output for `SHOW CREATE TABLE` which is what `schemadiff` uses. We
already today normalize all `charset` output to use the `utf8mb3` name
to avoid any ambiguity in what is intended.

We didn't do this though for collations. Today if `schemadiff` is fed
schemas generated both with MySQL 8.0.30 and older versions to compare,
it would indicate there's a difference when there is none.

The change here always normalizes to use the more explicit `utf8mb3_`
names for the collation if it can be found, based on the charset aliases
configured. This ensures that comparisons between such schemas don't see
accidental or stray diffs that are not really changes.

Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>
  • Loading branch information
dbussink committed Oct 4, 2022
1 parent 49dfecb commit 970e381
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 6 deletions.
27 changes: 27 additions & 0 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,33 @@ func (env *Environment) CharsetAlias(charset string) (alias string, ok bool) {
return
}

// CollationAlias returns the internal collaction name for the given charset.
// For now, this maps all `utf8` to `utf8mb3` collation names; in future versions of MySQL,
// this mapping will change, so it's important to use this helper so that
// Vitess code has a consistent mapping for the active collations environment.
func (env *Environment) CollationAlias(collation string) (string, bool) {
col := env.LookupByName(collation)
if col == nil {
return collation, false
}
allCols, ok := globalVersionInfo[col.ID()]
if !ok {
return collation, false
}
if len(allCols.alias) == 1 {
return collation, false
}
for _, alias := range allCols.alias {
for source, dest := range env.version.charsetAliases() {
if strings.HasPrefix(collation, fmt.Sprintf("%s_", source)) &&
strings.HasPrefix(alias.name, fmt.Sprintf("%s_", dest)) {
return alias.name, true
}
}
}
return collation, false
}

// DefaultConnectionCharset is the default charset that Vitess will use when negotiating a
// charset in a MySQL connection handshake. Note that in this context, a 'charset' is equivalent
// to a Collation ID, with the exception that it can only fit in 1 byte.
Expand Down
13 changes: 12 additions & 1 deletion go/vt/schemadiff/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,16 @@ func (c *CreateTableEntity) normalizeTableOptions() {
for _, opt := range c.CreateTable.TableSpec.Options {
opt.Name = strings.ToLower(opt.Name)
switch opt.Name {
case "charset", "collate":
case "charset":
opt.String = strings.ToLower(opt.String)
if charset, ok := collationEnv.CharsetAlias(opt.String); ok {
opt.String = charset
}
case "collate":
opt.String = strings.ToLower(opt.String)
if collation, ok := collationEnv.CollationAlias(opt.String); ok {
opt.String = collation
}
case "engine":
opt.String = strings.ToUpper(opt.String)
if engineName, ok := engineCasing[opt.String]; ok {
Expand Down Expand Up @@ -414,6 +419,12 @@ func (c *CreateTableEntity) normalizeColumnOptions() {
col.Type.Charset.Name = charset
}

// Map any collation aliases to the real collation. This applies mainly right
// now to utf8 being an alias for utf8mb3 collations.
if collation, ok := collationEnv.CollationAlias(col.Type.Options.Collate); ok {
col.Type.Options.Collate = collation
}

// Remove any lengths for integral types since it is deprecated there and
// doesn't mean anything anymore.
if _, ok := integralTypes[col.Type.Type]; ok {
Expand Down
10 changes: 5 additions & 5 deletions go/vt/schemadiff/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,8 @@ func TestCreateTableDiff(t *testing.T) {
name: "normalized COLLATE value",
from: "create table t1 (id int primary key) engine=innodb",
to: "create table t1 (id int primary key) engine=innodb, collate=UTF8_BIN",
diff: "alter table t1 collate utf8_bin",
cdiff: "ALTER TABLE `t1` COLLATE utf8_bin",
diff: "alter table t1 collate utf8mb3_bin",
cdiff: "ALTER TABLE `t1` COLLATE utf8mb3_bin",
},
}
standardHints := DiffHints{}
Expand Down Expand Up @@ -1616,17 +1616,17 @@ func TestNormalize(t *testing.T) {
{
name: "maps utf8 to utf8mb3",
from: "create table t (id int signed primary key, v varchar(255) charset utf8 collate utf8_general_ci) charset utf8 collate utf8_general_ci",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8_general_ci",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8mb3_general_ci",
},
{
name: "lowercase table options for charset and collation",
from: "create table t (id int signed primary key, v varchar(255) charset utf8 collate utf8_general_ci) charset UTF8 collate UTF8_GENERAL_CI",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8_general_ci",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8mb3_general_ci",
},
{
name: "drops existing collation if it matches table default at column level for non default charset",
from: "create table t (id int signed primary key, v varchar(255) charset utf8mb3 collate utf8_unicode_ci) charset utf8mb3 collate utf8_unicode_ci",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8_unicode_ci",
to: "CREATE TABLE `t` (\n\t`id` int PRIMARY KEY,\n\t`v` varchar(255)\n) CHARSET utf8mb3,\n COLLATE utf8mb3_unicode_ci",
},
{
name: "correct case table options for engine",
Expand Down

0 comments on commit 970e381

Please sign in to comment.