Skip to content

Commit

Permalink
HA-368 - Fix: fides annotate dataset enters incorrect value (#5727)
Browse files Browse the repository at this point in the history
  • Loading branch information
andres-torres-marroquin authored Feb 3, 2025
1 parent da19f8b commit df9aecd
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Changes can also be flagged with a GitHub label for tracking purposes. The URL o
- Added frequency field to DataHubSchema integration config [#5716](https://github.com/ethyca/fides/pull/5716)

### Fixed
- Fixed `fides annotate dataset` command enters incorrect value on the `direction` field. [#5727](https://github.com/ethyca/fides/pull/5727)
- Fixed Bigquery flakey tests. [#5713](https://github.com/ethyca/fides/pull/5713)
- Fixed breadcrumb navigation issues in data catalog view [#5717](https://github.com/ethyca/fides/pull/5717)

Expand Down
1 change: 1 addition & 0 deletions noxfiles/git_nox.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def tag(session: nox.Session, action: str) -> None:
- tag(dry) = Show the tag that would be applied.
- tag(push) = Tag the current commit and push it. NOTE: This will trigger a new CI job to publish the tag.
"""
# pip3.10 install GitPython
from git.repo import Repo

repo = Repo()
Expand Down
9 changes: 6 additions & 3 deletions src/fides/core/annotate_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,16 @@ def annotate_dataset(
if include_null:
output_dataset.append(current_dataset.model_dump(mode="json"))
else:
output_dataset.append(current_dataset.model_dump(exclude_none=True))
output_dataset.append(
current_dataset.model_dump(mode="json", exclude_none=True)
)
except AnnotationAbortError:
if include_null:
output_dataset.append(current_dataset.model_dump(mode="json"))
else:
output_dataset.append(current_dataset.model_dump(exclude_none=True))
output_dataset.append(
current_dataset.model_dump(mode="json", exclude_none=True)
)
break
manifests.write_manifest(dataset_file, output_dataset, "dataset")
echo_green("Annotation process complete.")
Expand Down Expand Up @@ -200,7 +204,6 @@ def annotate_fields(
"""
Check for data_categories at the field level
"""

for field in get_all_level_fields(table.fields):
if not field.data_categories:
click.secho(
Expand Down
22 changes: 22 additions & 0 deletions tests/ctl/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,28 @@ def test_annotate(
assert result.exit_code == 0
print(result.output)

def test_regression_annotate_dataset(
self,
test_config_path: str,
test_cli_runner: CliRunner,
):
test_cli_runner.invoke(
cli,
[
"-f",
test_config_path,
"annotate",
"dataset",
"tests/ctl/data/failing_direction.yml",
],
input="user\n",
)
with open("tests/ctl/data/failing_direction.yml", "r") as dataset_yml:
try:
dataset_yml = yaml.safe_load(dataset_yml)
except yaml.constructor.ConstructorError:
assert False, "The yaml file is not valid"


@pytest.mark.integration
def test_audit(test_config_path: str, test_cli_runner: CliRunner) -> None:
Expand Down
249 changes: 249 additions & 0 deletions tests/ctl/data/failing_direction.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
dataset:
- fides_key: google_cloud_sql_postgres_example_test_dataset
organization_fides_key: default_organization
name: Google Cloud SQL for Postgres Example Test Dataset
description: Example of a Google Cloud SQL Postgres dataset containing a variety
of related tables like customers, products, addresses, etc.
collections:
- name: address
fields:
- name: city
data_categories:
- user.contact.address.city
- name: house
data_categories:
- user.contact.address.street
- name: id
data_categories:
- system.operations
fides_meta:
primary_key: true
- name: state
data_categories:
- user.contact.address.state
- name: street
data_categories:
- user.contact.address.street
- name: zip
data_categories:
- user.contact.address.postal_code
- name: customer
fields:
- name: address_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: address.id
direction: to
- name: created
data_categories:
- system.operations
- name: email
data_categories:
- user.contact.email
fides_meta:
identity: email
data_type: string
- name: id
data_categories:
- user.unique_id
fides_meta:
primary_key: true
- name: name
data_categories:
- user.name
- name: employee
fields:
- name: address_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: address.id
direction: to
- name: email
data_categories:
- user.contact.email
fides_meta:
identity: email
data_type: string
- name: id
data_categories:
- user.unique_id
fides_meta:
primary_key: true
- name: name
data_categories:
- user.name
- name: login
fields:
- name: customer_id
data_categories:
- user.unique_id
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories:
- system.operations
- name: time
data_categories:
- user.sensor
- name: order_item
fields:
- name: order_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: orders.id
direction: from
- name: product_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: product.id
direction: to
- name: quantity
data_categories:
- system.operations
- name: orders
fields:
- name: customer_id
data_categories:
- user.unique_id
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories:
- system.operations
fides_meta:
primary_key: true
- name: shipping_address_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: address.id
direction: to
- name: payment_card
fields:
- name: billing_address_id
data_categories:
- system.operations
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: address.id
direction: to
- name: ccn
data_categories:
- user.financial.bank_account
- name: code
data_categories:
- user.financial
- name: customer_id
data_categories:
- user.unique_id
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories:
- system.operations
- name: name
data_categories:
- user.financial
- name: preferred
data_categories:
- user
- name: product
fields:
- name: id
data_categories:
- system.operations
- name: name
data_categories:
- system.operations
- name: price
data_categories:
- system.operations
- name: report
fields:
- name: email
data_categories:
- user.contact.email
fides_meta:
identity: email
data_type: string
- name: id
data_categories:
- system.operations
- name: month
data_categories:
- system.operations
- name: name
data_categories:
- system.operations
- name: total_visits
data_categories:
- system.operations
- name: year
data_categories:
- system.operations
- name: service_request
fields:
- name: alt_email
data_categories:
- user.contact.email
fides_meta:
identity: email
data_type: string
- name: closed
data_categories:
- system.operations
- name: email
data_categories:
- system.operations
fides_meta:
identity: email
data_type: string
- name: employee_id
data_categories:
- user.unique_id
fides_meta:
references:
- dataset: google_cloud_sql_postgres_example_test_dataset
field: employee.id
direction: from
- name: id
data_categories:
- system.operations
- name: opened
data_categories:
- system.operations
- name: visit
fields:
- name: email
data_categories:
- user.contact.email
fides_meta:
identity: email
data_type: string
- name: last_visit
data_categories:
- system.operations

0 comments on commit df9aecd

Please sign in to comment.