Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
Add validation for input path in select_table_schema method
Browse files Browse the repository at this point in the history
  • Loading branch information
Sung Won Chung committed Feb 1, 2024
1 parent b1d05b2 commit c09f9cf
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
5 changes: 5 additions & 0 deletions data_diff/databases/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ def create_connection(self):
def select_table_schema(self, path: DbPath) -> str:
database, schema, table = self._normalize_table_path(path)

# If path only contains one object, raise an error
if len(path) == 1:
raise ValueError('The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table')

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.10 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.8 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.9 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

Check failure on line 171 in data_diff/databases/duckdb.py

View workflow job for this annotation

GitHub Actions / Check Python 3.11 on ubuntu-latest

The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table

info_schema_path = ["information_schema", "columns"]

if database:
Expand All @@ -179,6 +183,7 @@ def select_table_schema(self, path: DbPath) -> str:
f"WHERE table_name = '{table}' AND table_schema = '{schema}' and table_catalog = {dynamic_database_clause}"
)


def _normalize_table_path(self, path: DbPath) -> DbPath:
if len(path) == 1:
return None, self.default_schema, path[0]
Expand Down
18 changes: 10 additions & 8 deletions tests/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def setUp(self):
self.duckdb_conn = duckdb_differ.DuckDB(filepath=test_duckdb_filepath)

def tearDown(self):
# Optional: delete file after tests
os.remove(test_duckdb_filepath)

def test_normalize_table_path(self):
Expand All @@ -29,9 +28,16 @@ def test_normalize_table_path(self):
self.duckdb_conn._normalize_table_path(("test_database", "test_schema", "test_table", "extra"))

def test_select_table_schema(self):
db_path = ("test_table",)
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'main' and table_catalog = current_catalog()"
self.assertEqual(self.duckdb_conn.select_table_schema(db_path), expected_sql)
with self.assertRaises(ValueError) as context:
# Try to call the select_table_schema with only one value in the tuple
db_path = ("test_table",)
self.duckdb_conn.select_table_schema(db_path)

# Check that the message in the ValueError is what you expect
self.assertTrue(
"The input path needs to have more than one object in your data diff configuration.\nExpected format: database.schema.table or schema.table"
in str(context.exception)
)

db_path = ("custom_schema", "test_table")
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'custom_schema' and table_catalog = current_catalog()"
Expand All @@ -40,7 +46,3 @@ def test_select_table_schema(self):
db_path = ("custom_db", "custom_schema", "test_table")
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM custom_db.information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'custom_schema' and table_catalog = 'custom_db'"
self.assertEqual(self.duckdb_conn.select_table_schema(db_path), expected_sql)


if __name__ == "__main__":
unittest.main()

0 comments on commit c09f9cf

Please sign in to comment.