Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(airflow): fixed issue with _is_memory_dataset #1011

Merged
merged 25 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
32fdf69
build(datasets): Release 6.0.0 (#968)
ravi-kumar-pilla Dec 18, 2024
ef05d23
chore(datasets): Remove tracking datasets which are used in Kedro Viz…
ravi-kumar-pilla Jan 8, 2025
778811c
docs(datasets): Move to linkcode extension (#985)
ankatiyar Jan 13, 2025
5a6b100
fix(datasets): Fix polars.CSVDataset `save` on Windows (#979)
ravi-kumar-pilla Jan 13, 2025
377cf97
feat(all): Replace trufflehog with detect-secrets (#983)
ElenaKhaustova Jan 13, 2025
7ed49f9
build(datasets): use intersphinx over type_targets (#801)
deepyaman Jan 15, 2025
fb9a099
fix(datasets): Add parameter to enable/disable lazy saving for `Parti…
ElenaKhaustova Jan 22, 2025
4a15b80
fix(datasets): use kwarg for Ibis `read_*` methods (#1005)
deepyaman Feb 10, 2025
e6b22c3
build(datasets): pin PyArrow until `19.0.1` is out (#1006)
deepyaman Feb 10, 2025
0fed73c
build(datasets): update list of extras for Ibis 10 (#1003)
deepyaman Feb 10, 2025
a652129
chore: remove internal devtools from release notes (#1004)
deepyaman Feb 10, 2025
c384f0c
998: Fixed case where MemoryDatasets in catalog wouldn't trigger `_is…
CF-FHB-X Feb 13, 2025
b9005e6
998: Tests to ensure that MemoryDatasets are passed in mocked data ca…
CF-FHB-X Feb 14, 2025
9fe874a
998: Changelog
CF-FHB-X Feb 14, 2025
4dd6619
998: Linting fixes
CF-FHB-X Feb 17, 2025
15a0e3c
build(datasets): update list of extras for Ibis 10 (#1003)
deepyaman Feb 10, 2025
6060e67
chore: remove internal devtools from release notes (#1004)
deepyaman Feb 10, 2025
e49f25d
998: Tests to ensure that MemoryDatasets are passed in mocked data ca…
CF-FHB-X Feb 14, 2025
cffbaa2
998: Linting fixes
CF-FHB-X Feb 17, 2025
b07b8cb
998: Changed function according to PR comments
CF-FHB-X Feb 18, 2025
2ec88b0
998: Tests to ensure that MemoryDatasets are passed in mocked data ca…
CF-FHB-X Feb 14, 2025
69aad57
998: Linting fixes
CF-FHB-X Feb 17, 2025
fbb8081
998: Tweaked release
CF-FHB-X Feb 18, 2025
4ed1e4f
Merge branch 'main' of github.com:/CF-FHB-X/kedro-plugins into fix/99…
CF-FHB-X Feb 18, 2025
8eb9beb
Update RELEASE.md
CF-FHB-X Feb 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kedro-airflow/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Upcoming Release
* Fixed check whether a dataset is a `MemoryDataset`.

# Release 0.9.2
* Removed support for Python 3.8
Expand Down
9 changes: 5 additions & 4 deletions kedro-airflow/kedro_airflow/grouping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from kedro.io import DataCatalog
from kedro.io import DataCatalog, MemoryDataset
from kedro.pipeline.node import Node
from kedro.pipeline.pipeline import Pipeline

Expand All @@ -11,9 +11,10 @@


def _is_memory_dataset(catalog, dataset_name: str) -> bool:
if dataset_name not in catalog:
return True
return False
"""Return whether a dataset is a MemoryDataset or not."""
return dataset_name not in catalog or isinstance(
catalog._get_dataset(dataset_name), MemoryDataset
)


def get_memory_datasets(
Expand Down
24 changes: 20 additions & 4 deletions kedro-airflow/tests/test_node_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any

import pytest
from kedro.io import AbstractDataset, DataCatalog
from kedro.io import AbstractDataset, DataCatalog, MemoryDataset
from kedro.pipeline import Pipeline, node
from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline

Expand All @@ -21,12 +21,16 @@ def _load(self):
return []


def mock_data_catalog(nodes: list[str], memory_nodes: set[str]) -> DataCatalog:
def mock_data_catalog(
nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False
) -> DataCatalog:
mock_catalog = DataCatalog()
for dataset_name in nodes:
if dataset_name not in memory_nodes:
dataset = TestDataset()
mock_catalog.add(dataset_name, dataset)
elif memory_nodes_in_catalog:
mock_catalog.add(dataset_name, MemoryDataset())

return mock_catalog

Expand Down Expand Up @@ -143,8 +147,20 @@ def test_group_memory_nodes(
),
],
)
def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str]):
mock_catalog = mock_data_catalog(nodes, memory_nodes)
@pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
def test_is_memory_dataset(
nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
):
"""Tests for the `_is_memory_dataset` function.

Args:
nodes: list of nodes to add to the catalog
memory_nodes: set of nodes which should be considered MemoryDatasets
memory_nodes_in_catalog: whether to add MemoryDatasets to the catalog or not
"""
mock_catalog = mock_data_catalog(
nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog
)
for node_name in nodes:
if node_name in memory_nodes:
assert _is_memory_dataset(mock_catalog, node_name)
Expand Down