-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
schemadiff: validate views' referenced columns via semantics #12565
Changes from 30 commits
c4c213b
3dac907
e88d426
61de6b0
389872c
56c8421
11d6379
f4fd673
a4ae98d
ef16bdf
54fb73a
2d3e70b
b69ede6
bc34369
bc8799d
dd1bc82
7f6fd13
4939d01
89d6cba
2194291
38eab14
7eb83c1
f98729b
36075ec
26e9577
5718b58
fe90b31
db43534
3bf836f
95a963e
ebf8a12
1244c0b
8b07d21
017e10d
eac2860
4daa08b
1699955
c66b6e9
1c54579
dfd1cc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -24,7 +24,11 @@ import ( | |||||
"sort" | ||||||
"strings" | ||||||
|
||||||
"vitess.io/vitess/go/vt/concurrency" | ||||||
"vitess.io/vitess/go/vt/proto/vtrpc" | ||||||
"vitess.io/vitess/go/vt/sqlparser" | ||||||
"vitess.io/vitess/go/vt/vterrors" | ||||||
"vitess.io/vitess/go/vt/vtgate/semantics" | ||||||
) | ||||||
|
||||||
// Schema represents a database schema, which may contain entities such as tables and views. | ||||||
|
@@ -318,6 +322,11 @@ func (s *Schema) normalize() error { | |||||
} | ||||||
} | ||||||
|
||||||
// Validate views' referenced columns: do these columns actually exist in referenced tables/views? | ||||||
if err := s.ValidateViewReferences(); err != nil { | ||||||
return err | ||||||
} | ||||||
|
||||||
// Validate table definitions | ||||||
for _, t := range s.tables { | ||||||
if err := t.validate(); err != nil { | ||||||
|
@@ -761,3 +770,147 @@ func (s *Schema) Apply(diffs []EntityDiff) (*Schema, error) { | |||||
} | ||||||
return dup, nil | ||||||
} | ||||||
|
||||||
func (s *Schema) ValidateViewReferences() error { | ||||||
errs := &concurrency.AllErrorRecorder{} | ||||||
schemaInformation := newDeclarativeSchemaInformation() | ||||||
|
||||||
// Remember that s.Entities() is already ordered by dependency. ie. tables first, then views | ||||||
// that only depend on those tables (or on dual), then 2nd tier views, etc. | ||||||
// Thus, the order of iteration below is valid and sufficient, to build | ||||||
for _, e := range s.Entities() { | ||||||
entityColumns, err := s.getEntityColumnNames(e.Name(), schemaInformation) | ||||||
if err != nil { | ||||||
errs.RecordError(err) | ||||||
continue | ||||||
} | ||||||
schemaInformation.addTable(e.Name()) | ||||||
for _, col := range entityColumns { | ||||||
schemaInformation.addColumn(e.Name(), col.Lowered()) | ||||||
} | ||||||
} | ||||||
|
||||||
// Add dual table with no explicit columns for dual style expressions in views. | ||||||
schemaInformation.addTable("dual") | ||||||
|
||||||
for _, view := range s.Views() { | ||||||
sel := sqlparser.CloneSelectStatement(view.CreateView.Select) // Analyze(), below, rewrites the select; we don't want to actually modify the schema | ||||||
_, err := semantics.Analyze(sel, semanticKS.Name, schemaInformation) | ||||||
extractColName := func(arg any) string { | ||||||
switch arg := arg.(type) { | ||||||
case string: | ||||||
return arg | ||||||
case *sqlparser.ColName: | ||||||
return arg.Name.String() | ||||||
default: | ||||||
return "" // unexpected | ||||||
} | ||||||
} | ||||||
formalizeErr := func(err error) error { | ||||||
if err == nil { | ||||||
return nil | ||||||
} | ||||||
semErr, ok := err.(*semantics.Error) | ||||||
if !ok { | ||||||
return err | ||||||
} | ||||||
if len(semErr.Args()) == 0 { | ||||||
return err | ||||||
} | ||||||
switch semErr.Code { | ||||||
case semantics.AmbiguousColumn: | ||||||
if colName := extractColName(semErr.Args()[0]); colName != "" { | ||||||
return &InvalidColumnReferencedInViewError{ | ||||||
View: view.Name(), | ||||||
Column: colName, | ||||||
Ambiguous: true, | ||||||
} | ||||||
} | ||||||
case semantics.ColumnNotFound: | ||||||
if colName := extractColName(semErr.Args()[0]); colName != "" { | ||||||
return &InvalidColumnReferencedInViewError{ | ||||||
View: view.Name(), | ||||||
Column: colName, | ||||||
} | ||||||
} | ||||||
} | ||||||
return err | ||||||
} | ||||||
errs.RecordError(formalizeErr(err)) | ||||||
} | ||||||
return errs.AggrError(vterrors.Aggregate) | ||||||
} | ||||||
|
||||||
// getTableColumnNames returns the names of columns in given table. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
func (s *Schema) getEntityColumnNames(entityName string, schemaInformation *declarativeSchemaInformation) ( | ||||||
columnNames []*sqlparser.IdentifierCI, | ||||||
err error, | ||||||
) { | ||||||
entity := s.Entity(entityName) | ||||||
if entity == nil { | ||||||
if strings.ToLower(entityName) == "dual" { | ||||||
// this is fine. DUAL does not exist but is allowed | ||||||
return nil, nil | ||||||
} | ||||||
return nil, &EntityNotFoundError{Name: entityName} | ||||||
} | ||||||
// The entity is either a table or a view | ||||||
switch entity := entity.(type) { | ||||||
case *CreateTableEntity: | ||||||
return s.getTableColumnNames(entity), nil | ||||||
case *CreateViewEntity: | ||||||
return s.getViewColumnNames(entity, schemaInformation) | ||||||
} | ||||||
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "unexpected entity type for %v", entityName) | ||||||
} | ||||||
|
||||||
// getTableColumnNames returns the names of columns in given table. | ||||||
func (s *Schema) getTableColumnNames(t *CreateTableEntity) (columnNames []*sqlparser.IdentifierCI) { | ||||||
for _, c := range t.TableSpec.Columns { | ||||||
columnNames = append(columnNames, &c.Name) | ||||||
} | ||||||
return columnNames | ||||||
} | ||||||
|
||||||
// getViewColumnNames returns the names of aliased columns returned by a given view. | ||||||
func (s *Schema) getViewColumnNames(v *CreateViewEntity, schemaInformation *declarativeSchemaInformation) ( | ||||||
columnNames []*sqlparser.IdentifierCI, | ||||||
err error, | ||||||
) { | ||||||
for _, node := range v.Select.GetColumns() { | ||||||
switch node := node.(type) { | ||||||
case *sqlparser.StarExpr: | ||||||
if tableName := node.TableName.Name.String(); tableName != "" { | ||||||
for _, col := range schemaInformation.Tables[tableName].Columns { | ||||||
name := sqlparser.CloneRefOfIdentifierCI(&col.Name) | ||||||
columnNames = append(columnNames, name) | ||||||
} | ||||||
} else { | ||||||
dependentNames, err := getViewDependentTableNames(v.CreateView) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI - information about used tables can be had from the semantic table that is the output of It's in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but, this is chicken and egg: I'm running the above in order to build the |
||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
// add all columns from all referenced tables and views | ||||||
for _, entityName := range dependentNames { | ||||||
if schemaInformation.Tables[entityName] != nil { // is nil for dual/DUAL | ||||||
for _, col := range schemaInformation.Tables[entityName].Columns { | ||||||
name := sqlparser.CloneRefOfIdentifierCI(&col.Name) | ||||||
columnNames = append(columnNames, name) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
if len(columnNames) == 0 { | ||||||
return nil, &InvalidStarExprInViewError{View: v.Name()} | ||||||
} | ||||||
case *sqlparser.AliasedExpr: | ||||||
ci := sqlparser.NewIdentifierCI(node.ColumnName()) | ||||||
columnNames = append(columnNames, &ci) | ||||||
} | ||||||
} | ||||||
|
||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
return columnNames, nil | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the error message looks very fragile based on what struct fields are set.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we also create the message ahead of time, when the struct is created?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that’s actually the wrong pattern. The core is the structured error and the string format is just a detail here.
The structured error removes the issue of string being brittle because anyone wants more details doesn’t look at the string at all.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Id' like to avoid that right now in the context of this PR, because that would require us to rewrite all error handling.
Also, I'm really not sure, because we want to be able to create errors using various combination of fields. We don't have a single "constructor" ; we'd need different
New*()
function for any combination of fields.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can rewrite that to be more clear
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was able to simplify
InvalidColumnReferencedInViewError
by removing unusedTable
field and subsequent error message generation cases.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@harshit-gangal allocating the string is also wasteful since when using structured errors it is never used. So I don’t think we should do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the MySQL protocol level, error strings are transferred over the wire, so we might be generating them eventually.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@harshit-gangal this is never used for MySQL level errors. But even then, we’d allocate when needed and not always so then it’s imho still an improvement.
It also improved the ergonomics imho since it’s now easier to create a structured error then.