Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hotfix: improve column lineage handling and table name extraction #434

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions pkg/pipeline/lineage.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,19 @@ func (p *LineageExtractor) processLineageColumns(foundPipeline *Pipeline, asset
if upstream.Table == asset.Name {
continue
}
upstreamAsset := foundPipeline.GetAssetByName(upstream.Table)
if upstreamAsset == nil {

tableSpec := strings.Split(upstream.Table, ".")

upstreamAsset := foundPipeline.GetAssetByName(tableSpec[len(tableSpec)-1])
if upstreamAsset == nil && upstream.Table != "" {
if err := p.addColumnToAsset(asset, lineageCol.Name, nil, &Column{
Name: upstream.Column,
Type: lineageCol.Type,
Checks: []ColumnCheck{},
Upstreams: []*UpstreamColumn{
{
Column: upstream.Column,
Table: strings.ToLower(upstream.Table),
Table: strings.ToLower(tableSpec[len(tableSpec)-1]),
},
},
}); err != nil {
Expand Down Expand Up @@ -232,10 +235,14 @@ func (p *LineageExtractor) processLineageColumns(foundPipeline *Pipeline, asset

// addColumnToAsset adds a new column to the asset based on upstream information.
func (p *LineageExtractor) addColumnToAsset(asset *Asset, colName string, upstreamAsset *Asset, upstreamCol *Column) error {
if asset == nil || upstreamCol == nil || colName == "" {
if asset == nil || colName == "" {
return errors.New("invalid arguments: all parameters must be non-nil and colName must not be empty")
}

if upstreamAsset == nil {
asset.Columns = append(asset.Columns, *upstreamCol)
return nil
}
if colName == "*" {
return nil
}
Expand Down Expand Up @@ -293,8 +300,7 @@ func (p *LineageExtractor) addColumnToAsset(asset *Asset, colName string, upstre
// upstreamExists checks if a given upstream already exists in the list.
func upstreamExists(upstreams []*UpstreamColumn, newUpstream UpstreamColumn) bool {
for _, existingUpstream := range upstreams {
if strings.EqualFold(existingUpstream.Column, newUpstream.Column) &&
strings.EqualFold(existingUpstream.Table, newUpstream.Table) {
if strings.EqualFold(existingUpstream.Column, newUpstream.Column) {
return true
}
}
Expand Down
2 changes: 0 additions & 2 deletions pythonsrc/parser/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
from dataclasses import dataclass

from sqlglot import parse_one, exp, lineage
from sqlglot.lineage import Node
from sqlglot.optimizer import optimize
Expand Down Expand Up @@ -171,7 +170,6 @@ def get_column_lineage(query: str, schema: dict, dialect: str):
{"column": column.name, "table": column.table}
)
non_selected_columns = list(non_selected_columns_dict.values())

return {
"columns": result,
"non_selected_columns": non_selected_columns,
Expand Down