From 62c868d780a08a2d03066d16a066b22fd108b45e Mon Sep 17 00:00:00 2001 From: Richard Date: Thu, 13 Feb 2025 13:33:21 -0500 Subject: [PATCH 1/4] 998: Fixed case where MemoryDatasets in catalog wouldn't trigger `_is_memory_dataset` Signed-off-by: Richard Asselin --- kedro-airflow/kedro_airflow/grouping.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kedro-airflow/kedro_airflow/grouping.py b/kedro-airflow/kedro_airflow/grouping.py index 3890804ae..31151e6d2 100644 --- a/kedro-airflow/kedro_airflow/grouping.py +++ b/kedro-airflow/kedro_airflow/grouping.py @@ -1,6 +1,6 @@ from __future__ import annotations -from kedro.io import DataCatalog +from kedro.io import DataCatalog, MemoryDataset from kedro.pipeline.node import Node from kedro.pipeline.pipeline import Pipeline @@ -11,9 +11,11 @@ def _is_memory_dataset(catalog, dataset_name: str) -> bool: + """Return whether a dataset is a MemoryDataset or not.""" if dataset_name not in catalog: return True - return False + else: + return isinstance(catalog.datasets[dataset_name], MemoryDataset) def get_memory_datasets( From a1bfd37d0a0f518a3d6540a98c6f3d31db36255c Mon Sep 17 00:00:00 2001 From: Richard Asselin Date: Fri, 14 Feb 2025 09:47:26 -0500 Subject: [PATCH 2/4] 998: Tests to ensure that MemoryDatasets are passed in mocked data catalog Signed-off-by: Richard Asselin --- kedro-airflow/tests/test_node_grouping.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py index 84f551545..e6b7faccb 100644 --- a/kedro-airflow/tests/test_node_grouping.py +++ b/kedro-airflow/tests/test_node_grouping.py @@ -3,7 +3,7 @@ from typing import Any import pytest -from kedro.io import AbstractDataset, DataCatalog +from kedro.io import AbstractDataset, DataCatalog, MemoryDataset from kedro.pipeline import Pipeline, node from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline @@ -21,12 +21,15 @@ def _load(self): return [] -def mock_data_catalog(nodes: list[str], memory_nodes: set[str]) -> DataCatalog: +def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False) -> DataCatalog: mock_catalog = DataCatalog() for dataset_name in nodes: if dataset_name not in memory_nodes: dataset = TestDataset() mock_catalog.add(dataset_name, dataset) + elif memory_nodes_in_catalog: + mock_catalog.add(dataset_name, MemoryDataset()) + return mock_catalog @@ -143,8 +146,16 @@ def test_group_memory_nodes( ), ], ) -def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str]): - mock_catalog = mock_data_catalog(nodes, memory_nodes) +@pytest.mark.parametrize("memory_nodes_in_catalog", (True, False)) +def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool): + """Tests for the `_is_memory_dataset` function. + + Args: + nodes: list of nodes to add to the catalog + memory_nodes: set of nodes which should be considered MemoryDatasets + memory_nodes_in_catalog: whether to add MemoryDatasets to the catalog or not + """ + mock_catalog = mock_data_catalog(nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog) for node_name in nodes: if node_name in memory_nodes: assert _is_memory_dataset(mock_catalog, node_name) From 5dbdb5f7a881e49a9c36430b27ff4f6f6b11582c Mon Sep 17 00:00:00 2001 From: Richard Asselin Date: Fri, 14 Feb 2025 10:36:43 -0500 Subject: [PATCH 3/4] 998: Changelog Signed-off-by: Richard Asselin --- kedro-airflow/RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md index 6bd0b7163..72032b0e1 100755 --- a/kedro-airflow/RELEASE.md +++ b/kedro-airflow/RELEASE.md @@ -1,4 +1,5 @@ # Upcoming Release +* Fixed case where MemoryDatasets in catalog wouldn't be collapsed correctly # Release 0.9.2 * Removed support for Python 3.8 From 9bb91eda4b88a788142b9eaa69323f182d2941ec Mon Sep 17 00:00:00 2001 From: Richard Date: Mon, 17 Feb 2025 06:57:32 -0500 Subject: [PATCH 4/4] 998: Linting fixes Signed-off-by: Richard Asselin --- kedro-airflow/tests/test_node_grouping.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py index e6b7faccb..aa0b3c5f0 100644 --- a/kedro-airflow/tests/test_node_grouping.py +++ b/kedro-airflow/tests/test_node_grouping.py @@ -21,7 +21,9 @@ def _load(self): return [] -def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False) -> DataCatalog: +def mock_data_catalog( + nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False +) -> DataCatalog: mock_catalog = DataCatalog() for dataset_name in nodes: if dataset_name not in memory_nodes: @@ -30,7 +32,6 @@ def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_ elif memory_nodes_in_catalog: mock_catalog.add(dataset_name, MemoryDataset()) - return mock_catalog @@ -147,7 +148,9 @@ def test_group_memory_nodes( ], ) @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False)) -def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool): +def test_is_memory_dataset( + nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool +): """Tests for the `_is_memory_dataset` function. Args: @@ -155,7 +158,9 @@ def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_node memory_nodes: set of nodes which should be considered MemoryDatasets memory_nodes_in_catalog: whether to add MemoryDatasets to the catalog or not """ - mock_catalog = mock_data_catalog(nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog) + mock_catalog = mock_data_catalog( + nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog + ) for node_name in nodes: if node_name in memory_nodes: assert _is_memory_dataset(mock_catalog, node_name)