From 32fdf69ba589ae1a563385de8ed64d7558892180 Mon Sep 17 00:00:00 2001
From: Ravi Kumar Pilla <ravi_kumar_pilla@mckinsey.com>
Date: Wed, 18 Dec 2024 10:01:50 -0600
Subject: [PATCH 01/24] build(datasets): Release 6.0.0 (#968)

release draft

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md                 | 8 +++++++-
 kedro-datasets/kedro_datasets/__init__.py | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index b48bcce39..a477dca5e 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,4 +1,10 @@
-# Upcoming Release 6.0.0
+# Upcoming Release
+## Major features and improvements
+## Bug fixes and other changes
+## Breaking Changes
+## Community contributions
+
+# Release 6.0.0
 
 ## Major features and improvements
 
diff --git a/kedro-datasets/kedro_datasets/__init__.py b/kedro-datasets/kedro_datasets/__init__.py
index 44692e803..94379814c 100644
--- a/kedro-datasets/kedro_datasets/__init__.py
+++ b/kedro-datasets/kedro_datasets/__init__.py
@@ -1,7 +1,7 @@
 """``kedro_datasets`` is where you can find all of Kedro's data connectors."""
 
 __all__ = ["KedroDeprecationWarning"]
-__version__ = "5.1.0"
+__version__ = "6.0.0"
 
 import sys
 import warnings

From ef05d2332f332db2a39b2189734a9550c4cec887 Mon Sep 17 00:00:00 2001
From: Ravi Kumar Pilla <ravi_kumar_pilla@mckinsey.com>
Date: Tue, 7 Jan 2025 18:48:56 -0600
Subject: [PATCH 02/24] chore(datasets): Remove tracking datasets which are
 used in Kedro Viz Experiment Tracking (#969)

* remove et related kedro datasets

* update release note and static json schema

* temporary doc fix

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md                     |   4 +
 .../docs/source/api/kedro_datasets.rst        |   2 -
 kedro-datasets/kedro_datasets/_typing.py      |   5 -
 .../kedro_datasets/dask/csv_dataset.py        |   4 +-
 .../kedro_datasets/dask/parquet_dataset.py    |   4 +-
 .../kedro_datasets/tracking/__init__.py       |  26 ---
 .../kedro_datasets/tracking/json_dataset.py   |  56 -----
 .../tracking/metrics_dataset.py               |  76 -------
 kedro-datasets/pyproject.toml                 |   4 -
 .../static/jsonschema/kedro-catalog-0.18.json |  72 -------
 .../static/jsonschema/kedro-catalog-0.19.json |  72 -------
 kedro-datasets/tests/tracking/__init__.py     |   0
 .../tests/tracking/test_json_dataset.py       | 195 -----------------
 .../tests/tracking/test_metrics_dataset.py    | 204 ------------------
 14 files changed, 8 insertions(+), 716 deletions(-)
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/__init__.py
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/json_dataset.py
 delete mode 100644 kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
 delete mode 100644 kedro-datasets/tests/tracking/__init__.py
 delete mode 100644 kedro-datasets/tests/tracking/test_json_dataset.py
 delete mode 100644 kedro-datasets/tests/tracking/test_metrics_dataset.py

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index a477dca5e..16fa5b18a 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,7 +1,11 @@
 # Upcoming Release
 ## Major features and improvements
 ## Bug fixes and other changes
+
 ## Breaking Changes
+
+- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
+
 ## Community contributions
 
 # Release 6.0.0
diff --git a/kedro-datasets/docs/source/api/kedro_datasets.rst b/kedro-datasets/docs/source/api/kedro_datasets.rst
index 0cbd3bc4e..63142220a 100644
--- a/kedro-datasets/docs/source/api/kedro_datasets.rst
+++ b/kedro-datasets/docs/source/api/kedro_datasets.rst
@@ -62,6 +62,4 @@ kedro_datasets
    svmlight.SVMLightDataset
    tensorflow.TensorFlowModelDataset
    text.TextDataset
-   tracking.JSONDataset
-   tracking.MetricsDataset
    yaml.YAMLDataset
diff --git a/kedro-datasets/kedro_datasets/_typing.py b/kedro-datasets/kedro_datasets/_typing.py
index feb6d91b7..aa083f514 100644
--- a/kedro-datasets/kedro_datasets/_typing.py
+++ b/kedro-datasets/kedro_datasets/_typing.py
@@ -9,8 +9,3 @@
 ImagePreview = NewType("ImagePreview", str)
 PlotlyPreview = NewType("PlotlyPreview", dict)
 JSONPreview = NewType("JSONPreview", str)
-
-
-# experiment tracking datasets types
-MetricsTrackingPreview = NewType("MetricsTrackingPreview", dict)
-JSONTrackingPreview = NewType("JSONTrackingPreview", dict)
diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
index 053da6b00..bc5b5764b 100644
--- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
@@ -67,9 +67,9 @@ def __init__(  # noqa: PLR0913
             filepath: Filepath in POSIX format to a CSV file
                 CSV collection or the directory of a multipart CSV.
             load_args: Additional loading options `dask.dataframe.read_csv`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.read_csv.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.read_csv.html
             save_args: Additional saving options for `dask.dataframe.to_csv`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.to_csv.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.to_csv.html
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
index 1acfe7cda..3b2dff73e 100644
--- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -97,9 +97,9 @@ def __init__(  # noqa: PLR0913
             filepath: Filepath in POSIX format to a parquet file
                 parquet collection or the directory of a multipart parquet.
             load_args: Additional loading options `dask.dataframe.read_parquet`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.read_parquet.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.read_parquet.html
             save_args: Additional saving options for `dask.dataframe.to_parquet`:
-                https://docs.dask.org/en/latest/generated/dask.dataframe.to_parquet.html
+                https://docs.dask.org/en/stable/generated/dask.dataframe.to_parquet.html
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
diff --git a/kedro-datasets/kedro_datasets/tracking/__init__.py b/kedro-datasets/kedro_datasets/tracking/__init__.py
deleted file mode 100644
index 1b1a5c70d..000000000
--- a/kedro-datasets/kedro_datasets/tracking/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""Dataset implementations to save data for Kedro Experiment Tracking."""
-
-import warnings
-from typing import Any
-
-import lazy_loader as lazy
-
-from kedro_datasets import KedroDeprecationWarning
-
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-JSONDataset: Any
-MetricsDataset: Any
-
-__getattr__, __dir__, __all__ = lazy.attach(
-    __name__,
-    submod_attrs={
-        "json_dataset": ["JSONDataset"],
-        "metrics_dataset": ["MetricsDataset"],
-    },
-)
-
-warnings.warn(
-    "`tracking.JSONDataset` and `tracking.MetricsDataset` are deprecated. These datasets will be removed in kedro-datasets 7.0.0",
-    KedroDeprecationWarning,
-    stacklevel=2,
-)
diff --git a/kedro-datasets/kedro_datasets/tracking/json_dataset.py b/kedro-datasets/kedro_datasets/tracking/json_dataset.py
deleted file mode 100644
index d73df1b10..000000000
--- a/kedro-datasets/kedro_datasets/tracking/json_dataset.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""``JSONDataset`` saves data to a JSON file using an underlying
-filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-The ``JSONDataset`` is part of Kedro Experiment Tracking. The dataset is versioned by default.
-"""
-
-import json
-from typing import NoReturn
-
-from kedro.io.core import DatasetError, get_filepath_str
-
-from kedro_datasets._typing import JSONTrackingPreview
-from kedro_datasets.json import json_dataset
-
-
-class JSONDataset(json_dataset.JSONDataset):
-    """``JSONDataset`` saves data to a JSON file using an underlying
-    filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-    The ``JSONDataset`` is part of Kedro Experiment Tracking.
-    The dataset is write-only and it is versioned by default.
-
-    Example usage for the
-    `YAML API <https://docs.kedro.org/en/stable/data/\
-    data_catalog_yaml_examples.html>`_:
-
-    .. code-block:: yaml
-
-        cars:
-          type: tracking.JSONDataset
-          filepath: data/09_tracking/cars.json
-
-    Example usage for the
-    `Python API <https://docs.kedro.org/en/stable/data/\
-    advanced_data_catalog_usage.html>`_:
-
-    .. code-block:: pycon
-
-        >>> from kedro_datasets.tracking import JSONDataset
-        >>>
-        >>> data = {"col1": 1, "col2": 0.23, "col3": 0.002}
-        >>>
-        >>> dataset = JSONDataset(filepath=tmp_path / "test.json")
-        >>> dataset.save(data)
-
-    """
-
-    versioned = True
-
-    def load(self) -> NoReturn:
-        raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'")
-
-    def preview(self) -> JSONTrackingPreview:  # type: ignore[override]
-        "Load the JSON tracking dataset used in Kedro-viz experiment tracking."
-        load_path = get_filepath_str(self._get_load_path(), self._protocol)
-
-        with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
-            return JSONTrackingPreview(json.load(fs_file))
diff --git a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
deleted file mode 100644
index 6202acf34..000000000
--- a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""``MetricsDataset`` saves data to a JSON file using an underlying
-filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file.
-The ``MetricsDataset`` is part of Kedro Experiment Tracking. The dataset is versioned by default
-and only takes metrics of numeric values.
-"""
-
-import json
-from typing import NoReturn
-
-from kedro.io.core import DatasetError, get_filepath_str
-
-from kedro_datasets._typing import MetricsTrackingPreview
-from kedro_datasets.json import json_dataset
-
-
-class MetricsDataset(json_dataset.JSONDataset):
-    """``MetricsDataset`` saves data to a JSON file using an underlying
-    filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file. The
-    ``MetricsDataset`` is part of Kedro Experiment Tracking. The dataset is write-only,
-    it is versioned by default and only takes metrics of numeric values.
-
-    Example usage for the
-    `YAML API <https://docs.kedro.org/en/stable/data/\
-    data_catalog_yaml_examples.html>`_:
-
-    .. code-block:: yaml
-
-        cars:
-          type: tracking.MetricsDataset
-          filepath: data/09_tracking/cars.json
-
-    Example usage for the
-    `Python API <https://docs.kedro.org/en/stable/data/\
-    advanced_data_catalog_usage.html>`_:
-
-    .. code-block:: pycon
-
-        >>> from kedro_datasets.tracking import MetricsDataset
-        >>>
-        >>> data = {"col1": 1, "col2": 0.23, "col3": 0.002}
-        >>>
-        >>> dataset = MetricsDataset(filepath=tmp_path / "test.json")
-        >>> dataset.save(data)
-
-    """
-
-    versioned = True
-
-    def load(self) -> NoReturn:
-        raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'")
-
-    def save(self, data: dict[str, float]) -> None:
-        """Converts all values in the data from a ``MetricsDataset`` to float to make sure
-        they are numeric values which can be displayed in Kedro Viz and then saves the dataset.
-        """
-        try:
-            for key, value in data.items():
-                data[key] = float(value)
-        except ValueError as exc:
-            raise DatasetError(
-                f"The MetricsDataset expects only numeric values. {exc}"
-            ) from exc
-
-        save_path = get_filepath_str(self._get_save_path(), self._protocol)
-
-        with self._fs.open(save_path, **self._fs_open_args_save) as fs_file:
-            json.dump(data, fs_file, **self._save_args)
-
-        self._invalidate_cache()
-
-    def preview(self) -> MetricsTrackingPreview:  # type: ignore[override]
-        "Load the Metrics tracking dataset used in Kedro-viz experiment tracking"
-        load_path = get_filepath_str(self._get_load_path(), self._protocol)
-
-        with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
-            return json.load(fs_file)
diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 91b938c19..3ee8eb9e9 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -163,10 +163,6 @@ tensorflow = ["kedro-datasets[tensorflow-tensorflowmodeldataset]"]
 text-textdataset = []
 text = ["kedro-datasets[text-textdataset]"]
 
-tracking-jsondataset = []
-tracking-metricsdataset = []
-tracking = ["kedro-datasets[tracking-jsondataset, tracking-metricsdataset]"]
-
 yaml-yamldataset = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"]
 yaml = ["kedro-datasets[yaml-yamldataset]"]
 
diff --git a/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json b/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
index 195f0234a..b9fa61d14 100644
--- a/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
+++ b/kedro-datasets/static/jsonschema/kedro-catalog-0.18.json
@@ -42,8 +42,6 @@
               "spark.SparkJDBCDataSet",
               "tensorflow.TensorFlowModelDataset",
               "text.TextDataSet",
-              "tracking.JSONDataSet",
-              "tracking.MetricsDataSet",
               "yaml.YAMLDataSet"
             ]
           }
@@ -1312,76 +1310,6 @@
               }
             }
           },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.JSONDataSet"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.MetricsDataSet"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
           {
             "if": {
               "properties": {
diff --git a/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json b/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
index f19266812..087725710 100644
--- a/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
+++ b/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json
@@ -41,8 +41,6 @@
               "spark.SparkJDBCDataset",
               "tensorflow.TensorFlowModelDataset",
               "text.TextDataset",
-              "tracking.JSONDataset",
-              "tracking.MetricsDataset",
               "yaml.YAMLDataset"
             ]
           }
@@ -1277,76 +1275,6 @@
               }
             }
           },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.JSONDataset"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
-          {
-            "if": {
-              "properties": {
-                "type": {
-                  "const": "tracking.MetricsDataset"
-                }
-              }
-            },
-            "then": {
-              "required": [
-                "filepath"
-              ],
-              "properties": {
-                "filepath": {
-                  "type": "string",
-                  "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning."
-                },
-                "save_args": {
-                  "type": "object",
-                  "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False."
-                },
-                "credentials": {
-                  "type": [
-                    "object",
-                    "string"
-                  ],
-                  "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`."
-                },
-                "fs_args": {
-                  "type": "object",
-                  "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving."
-                }
-              }
-            }
-          },
           {
             "if": {
               "properties": {
diff --git a/kedro-datasets/tests/tracking/__init__.py b/kedro-datasets/tests/tracking/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/kedro-datasets/tests/tracking/test_json_dataset.py b/kedro-datasets/tests/tracking/test_json_dataset.py
deleted file mode 100644
index de24ba9b9..000000000
--- a/kedro-datasets/tests/tracking/test_json_dataset.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import inspect
-import json
-from pathlib import Path, PurePosixPath
-
-import pytest
-from fsspec.implementations.local import LocalFileSystem
-from gcsfs import GCSFileSystem
-from kedro.io.core import PROTOCOL_DELIMITER, DatasetError, Version
-from s3fs.core import S3FileSystem
-
-from kedro_datasets.tracking import JSONDataset
-
-
-@pytest.fixture
-def filepath_json(tmp_path):
-    return (tmp_path / "test.json").as_posix()
-
-
-@pytest.fixture
-def json_dataset(filepath_json, save_args, fs_args):
-    return JSONDataset(filepath=filepath_json, save_args=save_args, fs_args=fs_args)
-
-
-@pytest.fixture
-def explicit_versioned_json_dataset(filepath_json, load_version, save_version):
-    return JSONDataset(
-        filepath=filepath_json, version=Version(load_version, save_version)
-    )
-
-
-@pytest.fixture
-def dummy_data():
-    return {"col1": 1, "col2": 2, "col3": "mystring"}
-
-
-class TestJSONDataset:
-    def test_save(self, filepath_json, dummy_data, tmp_path, save_version):
-        """Test saving and reloading the dataset."""
-        json_dataset = JSONDataset(
-            filepath=filepath_json, version=Version(None, save_version)
-        )
-        json_dataset.save(dummy_data)
-
-        actual_filepath = Path(json_dataset._filepath.as_posix())
-        test_filepath = tmp_path / "locally_saved.json"
-
-        test_filepath.parent.mkdir(parents=True, exist_ok=True)
-        with open(test_filepath, "w", encoding="utf-8") as file:
-            json.dump(dummy_data, file)
-
-        with open(test_filepath, encoding="utf-8") as file:
-            test_data = json.load(file)
-
-        with open(
-            (actual_filepath / save_version / "test.json"), encoding="utf-8"
-        ) as actual_file:
-            actual_data = json.load(actual_file)
-
-        assert actual_data == test_data
-        assert json_dataset._fs_open_args_load == {}
-        assert json_dataset._fs_open_args_save == {"mode": "w"}
-
-    def test_load_fail(self, json_dataset, dummy_data):
-        json_dataset.save(dummy_data)
-        pattern = r"Loading not supported for 'JSONDataset'"
-        with pytest.raises(DatasetError, match=pattern):
-            json_dataset.load()
-
-    def test_exists(self, json_dataset, dummy_data):
-        """Test `exists` method invocation for both existing and
-        nonexistent dataset."""
-        assert not json_dataset.exists()
-        json_dataset.save(dummy_data)
-        assert json_dataset.exists()
-
-    @pytest.mark.parametrize(
-        "save_args", [{"k1": "v1", "index": "value"}], indirect=True
-    )
-    def test_save_extra_params(self, json_dataset, save_args):
-        """Test overriding the default save arguments."""
-        for key, value in save_args.items():
-            assert json_dataset._save_args[key] == value
-
-    @pytest.mark.parametrize(
-        "fs_args",
-        [{"open_args_load": {"mode": "rb", "compression": "gzip"}}],
-        indirect=True,
-    )
-    def test_open_extra_args(self, json_dataset, fs_args):
-        assert json_dataset._fs_open_args_load == fs_args["open_args_load"]
-        assert json_dataset._fs_open_args_save == {"mode": "w"}  # default unchanged
-
-    @pytest.mark.parametrize(
-        "filepath,instance_type",
-        [
-            ("s3://bucket/file.json", S3FileSystem),
-            ("file:///tmp/test.json", LocalFileSystem),
-            ("/tmp/test.json", LocalFileSystem),
-            ("gcs://bucket/file.json", GCSFileSystem),
-        ],
-    )
-    def test_protocol_usage(self, filepath, instance_type):
-        dataset = JSONDataset(filepath=filepath)
-        assert isinstance(dataset._fs, instance_type)
-
-        path = filepath.split(PROTOCOL_DELIMITER, 1)[-1]
-
-        assert str(dataset._filepath) == path
-        assert isinstance(dataset._filepath, PurePosixPath)
-
-    def test_catalog_release(self, mocker):
-        fs_mock = mocker.patch("fsspec.filesystem").return_value
-        filepath = "test.json"
-        dataset = JSONDataset(filepath=filepath)
-        dataset.release()
-        fs_mock.invalidate_cache.assert_called_once_with(filepath)
-
-    def test_not_version_str_repr(self):
-        """Test that version is not in string representation of the class instance."""
-        filepath = "test.json"
-        ds = JSONDataset(filepath=filepath)
-
-        assert filepath in str(ds)
-        assert "version" not in str(ds)
-        assert "JSONDataset" in str(ds)
-        assert "protocol" in str(ds)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds)
-
-    def test_version_str_repr(self, load_version, save_version):
-        """Test that version is in string representation of the class instance."""
-        filepath = "test.json"
-        ds_versioned = JSONDataset(
-            filepath=filepath, version=Version(load_version, save_version)
-        )
-
-        assert filepath in str(ds_versioned)
-        ver_str = f"version=Version(load={load_version}, save='{save_version}')"
-        assert ver_str in str(ds_versioned)
-        assert "JSONDataset" in str(ds_versioned)
-        assert "protocol" in str(ds_versioned)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds_versioned)
-
-    def test_prevent_overwrite(self, explicit_versioned_json_dataset, dummy_data):
-        """Check the error when attempting to override the dataset if the
-        corresponding json file for a given save version already exists."""
-        explicit_versioned_json_dataset.save(dummy_data)
-        pattern = (
-            r"Save path \'.+\' for JSONDataset\(.+\) must "
-            r"not exist if versioning is enabled\."
-        )
-        with pytest.raises(DatasetError, match=pattern):
-            explicit_versioned_json_dataset.save(dummy_data)
-
-    @pytest.mark.parametrize(
-        "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
-    )
-    @pytest.mark.parametrize(
-        "save_version", ["2019-01-02T00.00.00.000Z"], indirect=True
-    )
-    def test_save_version_warning(
-        self,
-        explicit_versioned_json_dataset,
-        load_version,
-        save_version,
-        dummy_data,
-    ):
-        """Check the warning when saving to the path that differs from
-        the subsequent load path."""
-        pattern = (
-            f"Save version '{save_version}' did not match "
-            f"load version '{load_version}' for "
-            r"JSONDataset\(.+\)"
-        )
-        with pytest.warns(UserWarning, match=pattern):
-            explicit_versioned_json_dataset.save(dummy_data)
-
-    def test_http_filesystem_no_versioning(self):
-        pattern = "Versioning is not supported for HTTP protocols."
-
-        with pytest.raises(DatasetError, match=pattern):
-            JSONDataset(
-                filepath="https://example.com/file.json", version=Version(None, None)
-            )
-
-    def test_preview(self, json_dataset, dummy_data):
-        expected_preview = {"col1": 1, "col2": 2, "col3": "mystring"}
-        json_dataset.save(dummy_data)
-        preview = json_dataset.preview()
-        assert preview == expected_preview
-        assert (
-            inspect.signature(json_dataset.preview).return_annotation.__name__
-            == "JSONTrackingPreview"
-        )
diff --git a/kedro-datasets/tests/tracking/test_metrics_dataset.py b/kedro-datasets/tests/tracking/test_metrics_dataset.py
deleted file mode 100644
index b638fcdfd..000000000
--- a/kedro-datasets/tests/tracking/test_metrics_dataset.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import inspect
-import json
-from pathlib import Path, PurePosixPath
-
-import pytest
-from fsspec.implementations.local import LocalFileSystem
-from gcsfs import GCSFileSystem
-from kedro.io.core import PROTOCOL_DELIMITER, DatasetError, Version
-from s3fs.core import S3FileSystem
-
-from kedro_datasets.tracking import MetricsDataset
-
-
-@pytest.fixture
-def filepath_json(tmp_path):
-    return (tmp_path / "test.json").as_posix()
-
-
-@pytest.fixture
-def metrics_dataset(filepath_json, save_args, fs_args):
-    return MetricsDataset(filepath=filepath_json, save_args=save_args, fs_args=fs_args)
-
-
-@pytest.fixture
-def explicit_versioned_metrics_dataset(filepath_json, load_version, save_version):
-    return MetricsDataset(
-        filepath=filepath_json, version=Version(load_version, save_version)
-    )
-
-
-@pytest.fixture
-def dummy_data():
-    return {"col1": 1, "col2": 2, "col3": 3}
-
-
-class TestMetricsDataset:
-    def test_save_data(
-        self,
-        dummy_data,
-        tmp_path,
-        filepath_json,
-        save_version,
-    ):
-        """Test saving and reloading the dataset."""
-        metrics_dataset = MetricsDataset(
-            filepath=filepath_json, version=Version(None, save_version)
-        )
-        metrics_dataset.save(dummy_data)
-
-        actual_filepath = Path(metrics_dataset._filepath.as_posix())
-        test_filepath = tmp_path / "locally_saved.json"
-
-        test_filepath.parent.mkdir(parents=True, exist_ok=True)
-        with open(test_filepath, "w", encoding="utf-8") as file:
-            json.dump(dummy_data, file)
-
-        with open(test_filepath, encoding="utf-8") as file:
-            test_data = json.load(file)
-
-        with open(
-            (actual_filepath / save_version / "test.json"), encoding="utf-8"
-        ) as actual_file:
-            actual_data = json.load(actual_file)
-
-        assert actual_data == test_data
-        assert metrics_dataset._fs_open_args_load == {}
-        assert metrics_dataset._fs_open_args_save == {"mode": "w"}
-
-    def test_load_fail(self, metrics_dataset, dummy_data):
-        metrics_dataset.save(dummy_data)
-        pattern = r"Loading not supported for 'MetricsDataset'"
-        with pytest.raises(DatasetError, match=pattern):
-            metrics_dataset.load()
-
-    def test_exists(self, metrics_dataset, dummy_data):
-        """Test `exists` method invocation for both existing and
-        nonexistent dataset."""
-        assert not metrics_dataset.exists()
-        metrics_dataset.save(dummy_data)
-        assert metrics_dataset.exists()
-
-    @pytest.mark.parametrize(
-        "save_args", [{"k1": "v1", "index": "value"}], indirect=True
-    )
-    def test_save_extra_params(self, metrics_dataset, save_args):
-        """Test overriding the default save arguments."""
-        for key, value in save_args.items():
-            assert metrics_dataset._save_args[key] == value
-
-    @pytest.mark.parametrize(
-        "fs_args",
-        [{"open_args_load": {"mode": "rb", "compression": "gzip"}}],
-        indirect=True,
-    )
-    def test_open_extra_args(self, metrics_dataset, fs_args):
-        assert metrics_dataset._fs_open_args_load == fs_args["open_args_load"]
-        assert metrics_dataset._fs_open_args_save == {"mode": "w"}  # default unchanged
-
-    @pytest.mark.parametrize(
-        "filepath,instance_type",
-        [
-            ("s3://bucket/file.json", S3FileSystem),
-            ("file:///tmp/test.json", LocalFileSystem),
-            ("/tmp/test.json", LocalFileSystem),
-            ("gcs://bucket/file.json", GCSFileSystem),
-        ],
-    )
-    def test_protocol_usage(self, filepath, instance_type):
-        dataset = MetricsDataset(filepath=filepath)
-        assert isinstance(dataset._fs, instance_type)
-
-        path = filepath.split(PROTOCOL_DELIMITER, 1)[-1]
-
-        assert str(dataset._filepath) == path
-        assert isinstance(dataset._filepath, PurePosixPath)
-
-    def test_catalog_release(self, mocker):
-        fs_mock = mocker.patch("fsspec.filesystem").return_value
-        filepath = "test.json"
-        dataset = MetricsDataset(filepath=filepath)
-        dataset.release()
-        fs_mock.invalidate_cache.assert_called_once_with(filepath)
-
-    def test_fail_on_saving_non_numeric_value(self, metrics_dataset):
-        data = {"col1": 1, "col2": 2, "col3": "hello"}
-
-        pattern = "The MetricsDataset expects only numeric values."
-        with pytest.raises(DatasetError, match=pattern):
-            metrics_dataset.save(data)
-
-    def test_not_version_str_repr(self):
-        """Test that version is not in string representation of the class instance."""
-        filepath = "test.json"
-        ds = MetricsDataset(filepath=filepath)
-
-        assert filepath in str(ds)
-        assert "version" not in str(ds)
-        assert "MetricsDataset" in str(ds)
-        assert "protocol" in str(ds)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds)
-
-    def test_version_str_repr(self, load_version, save_version):
-        """Test that version is in string representation of the class instance."""
-        filepath = "test.json"
-        ds_versioned = MetricsDataset(
-            filepath=filepath, version=Version(load_version, save_version)
-        )
-
-        assert filepath in str(ds_versioned)
-        ver_str = f"version=Version(load={load_version}, save='{save_version}')"
-        assert ver_str in str(ds_versioned)
-        assert "MetricsDataset" in str(ds_versioned)
-        assert "protocol" in str(ds_versioned)
-        # Default save_args
-        assert "save_args={'indent': 2}" in str(ds_versioned)
-
-    def test_prevent_overwrite(self, explicit_versioned_metrics_dataset, dummy_data):
-        """Check the error when attempting to override the dataset if the
-        corresponding json file for a given save version already exists."""
-        explicit_versioned_metrics_dataset.save(dummy_data)
-        pattern = (
-            r"Save path \'.+\' for MetricsDataset\(.+\) must "
-            r"not exist if versioning is enabled\."
-        )
-        with pytest.raises(DatasetError, match=pattern):
-            explicit_versioned_metrics_dataset.save(dummy_data)
-
-    @pytest.mark.parametrize(
-        "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
-    )
-    @pytest.mark.parametrize(
-        "save_version", ["2019-01-02T00.00.00.000Z"], indirect=True
-    )
-    def test_save_version_warning(
-        self, explicit_versioned_metrics_dataset, load_version, save_version, dummy_data
-    ):
-        """Check the warning when saving to the path that differs from
-        the subsequent load path."""
-        pattern = (
-            f"Save version '{save_version}' did not match "
-            f"load version '{load_version}' for "
-            r"MetricsDataset\(.+\)"
-        )
-        with pytest.warns(UserWarning, match=pattern):
-            explicit_versioned_metrics_dataset.save(dummy_data)
-
-    def test_http_filesystem_no_versioning(self):
-        pattern = "Versioning is not supported for HTTP protocols."
-
-        with pytest.raises(DatasetError, match=pattern):
-            MetricsDataset(
-                filepath="https://example.com/file.json", version=Version(None, None)
-            )
-
-    def test_preview(self, metrics_dataset, dummy_data):
-        expected_preview = {"col1": 1, "col2": 2, "col3": 3}
-        metrics_dataset.save(dummy_data)
-        preview = metrics_dataset.preview()
-        assert preview == expected_preview
-        assert (
-            inspect.signature(metrics_dataset.preview).return_annotation.__name__
-            == "MetricsTrackingPreview"
-        )

From 778811c976ff871dde6640ae9b62a11884540859 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Mon, 13 Jan 2025 15:15:32 +0000
Subject: [PATCH 03/24] docs(datasets): Move to linkcode extension (#985)

Move to linkcode extension

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/docs/source/conf.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py
index f62e80104..039658936 100644
--- a/kedro-datasets/docs/source/conf.py
+++ b/kedro-datasets/docs/source/conf.py
@@ -14,6 +14,8 @@
 from __future__ import annotations
 
 import importlib
+import inspect
+import os
 import re
 import sys
 from inspect import getmembers, isclass, isfunction
@@ -22,6 +24,8 @@
 from click import secho, style
 from kedro import __version__ as release
 
+import kedro_datasets
+
 # -- Project information -----------------------------------------------------
 
 project = "kedro-datasets"
@@ -47,7 +51,7 @@
     "sphinx_autodoc_typehints",
     "sphinx.ext.doctest",
     "sphinx.ext.ifconfig",
-    "sphinx.ext.viewcode",
+    "sphinx.ext.linkcode",
     "sphinxcontrib.jquery",
     "sphinx_copybutton",
     "myst_parser",
@@ -452,3 +456,25 @@ def setup(app):
 user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0"
 
 myst_heading_anchors = 5
+
+def linkcode_resolve(domain, info):
+    """Resolve a GitHub URL corresponding to a Python object."""
+    if domain != 'py':
+        return None
+
+    try:
+        mod = sys.modules[info['module']]
+        obj = mod
+        for attr in info['fullname'].split('.'):
+            obj = getattr(obj, attr)
+        obj = inspect.unwrap(obj)
+
+        filename = inspect.getsourcefile(obj)
+        source, lineno = inspect.getsourcelines(obj)
+        relpath = os.path.relpath(filename, start=os.path.dirname(
+          kedro_datasets.__file__))
+
+        return f'https://github.com/kedro-org/kedro-plugins/blob/main/kedro-datasets/kedro_datasets/{relpath}#L{lineno}#L{lineno + len(source) - 1}'
+
+    except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError):
+        return None

From 5a6b100ff3ab2c557d119820b40f7963f6d385f2 Mon Sep 17 00:00:00 2001
From: Ravi Kumar Pilla <ravi_kumar_pilla@mckinsey.com>
Date: Mon, 13 Jan 2025 09:48:50 -0600
Subject: [PATCH 04/24] fix(datasets): Fix polars.CSVDataset `save` on Windows
 (#979)

* test csv win

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* change ci yaml for testing

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* change ci yaml for testing

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* add default encoding when opening file

* revert workflow tests

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* fix lint

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>

* update release note

* update release note

---------

Signed-off-by: ravi_kumar_pilla <ravi_kumar_pilla@mckinsey.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md                           |  5 ++++-
 kedro-datasets/kedro_datasets/polars/csv_dataset.py |  4 +++-
 kedro-datasets/tests/polars/test_csv_dataset.py     | 10 ----------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 16fa5b18a..27df63f78 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,10 +1,13 @@
 # Upcoming Release
 ## Major features and improvements
+
 ## Bug fixes and other changes
 
+- Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
+
 ## Breaking Changes
 
-- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`
+- Removed `tracking.MetricsDataset` and `tracking.JSONDataset`.
 
 ## Community contributions
 
diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
index 6d8a988a5..9e6f35846 100644
--- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
@@ -72,7 +72,9 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]):
 
     DEFAULT_LOAD_ARGS: dict[str, Any] = {"rechunk": True}
     DEFAULT_SAVE_ARGS: dict[str, Any] = {}
-    DEFAULT_FS_ARGS: dict[str, Any] = {"open_args_save": {"mode": "w"}}
+    DEFAULT_FS_ARGS: dict[str, Any] = {
+        "open_args_save": {"mode": "w", "encoding": "utf-8"}
+    }
 
     def __init__(  # noqa: PLR0913
         self,
diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py
index e03f192cc..5312e9b48 100644
--- a/kedro-datasets/tests/polars/test_csv_dataset.py
+++ b/kedro-datasets/tests/polars/test_csv_dataset.py
@@ -88,14 +88,12 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame):
 
 
 class TestCSVDataset:
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, csv_dataset, dummy_dataframe):
         """Test saving and reloading the dataset."""
         csv_dataset.save(dummy_dataframe)
         reloaded = csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for both existing and
         nonexistent dataset."""
@@ -204,7 +202,6 @@ def test_version_str_repr(self, load_version, save_version):
         assert "load_args={'rechunk': True}" in str(ds)
         assert "load_args={'rechunk': True}" in str(ds_versioned)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
         """Test that saved and reloaded data matches the original one for
         the versioned dataset."""
@@ -212,7 +209,6 @@ def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe):
         reloaded_df = versioned_csv_dataset.load()
         assert_frame_equal(dummy_dataframe, reloaded_df)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_csv):
         """Test that if a new version is created mid-run, by an
         external system, it won't be loaded in the current run."""
@@ -236,7 +232,6 @@ def test_multiple_loads(self, versioned_csv_dataset, dummy_dataframe, filepath_c
             ds_new.resolve_load_version() == v_new
         )  # new version is discoverable by a new instance
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         """Test multiple cycles of save followed by load for the same dataset"""
         ds_versioned = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -259,7 +254,6 @@ def test_multiple_saves(self, dummy_dataframe, filepath_csv):
         ds_new = CSVDataset(filepath=filepath_csv, version=Version(None, None))
         assert ds_new.resolve_load_version() == second_load_version
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_release_instance_cache(self, dummy_dataframe, filepath_csv):
         """Test that cache invalidation does not affect other instances"""
         ds_a = CSVDataset(filepath=filepath_csv, version=Version(None, None))
@@ -288,14 +282,12 @@ def test_no_versions(self, versioned_csv_dataset):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.load()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_exists(self, versioned_csv_dataset, dummy_dataframe):
         """Test `exists` method invocation for versioned dataset."""
         assert not versioned_csv_dataset.exists()
         versioned_csv_dataset.save(dummy_dataframe)
         assert versioned_csv_dataset.exists()
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         """Check the error when attempting to override the dataset if the
         corresponding CSV file for a given save version already exists."""
@@ -307,7 +299,6 @@ def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe):
         with pytest.raises(DatasetError, match=pattern):
             versioned_csv_dataset.save(dummy_dataframe)
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     @pytest.mark.parametrize(
         "load_version", ["2019-01-01T23.59.59.999Z"], indirect=True
     )
@@ -334,7 +325,6 @@ def test_http_filesystem_no_versioning(self):
                 filepath="https://example.com/file.csv", version=Version(None, None)
             )
 
-    @pytest.mark.xfail(sys.platform == "win32", reason="file encoding is not UTF-8")
     def test_versioning_existing_dataset(
         self, csv_dataset, versioned_csv_dataset, dummy_dataframe
     ):

From 377cf97f2312f426ec6464be659f9fd0772b9736 Mon Sep 17 00:00:00 2001
From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Date: Mon, 13 Jan 2025 16:55:09 +0000
Subject: [PATCH 05/24] feat(all): Replace trufflehog with detect-secrets
 (#983)

* Removed trufflehog

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated github actions per plugin

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated validate-pr check scopes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated lint command

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added key to trigger check

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated GH action to track per plugin

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed secret

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated GH for kedro-datasets

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated secrets baseline

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 .github/workflows/detect-secrets.yml     |  46 +++
 .github/workflows/kedro-airflow.yml      |   7 +
 .github/workflows/kedro-datasets.yml     |   7 +
 .github/workflows/kedro-docker.yml       |   7 +
 .github/workflows/kedro-telemetry.yml    |   7 +
 .github/workflows/validate-pr-title.yaml |   1 +
 .pre-commit-config.yaml                  |  12 +-
 .secrets.baseline                        | 494 +++++++++++++++++++++++
 Makefile                                 |   5 +-
 kedro-airflow/RELEASE.md                 |   1 +
 kedro-airflow/pyproject.toml             |   2 +-
 kedro-datasets/RELEASE.md                |   2 +
 kedro-datasets/pyproject.toml            |   2 +-
 kedro-docker/RELEASE.md                  |   1 +
 kedro-docker/pyproject.toml              |   2 +-
 kedro-telemetry/RELEASE.md               |   1 +
 kedro-telemetry/pyproject.toml           |   2 +-
 trufflehog-ignore.txt                    |   3 -
 18 files changed, 585 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/detect-secrets.yml
 create mode 100644 .secrets.baseline
 delete mode 100644 trufflehog-ignore.txt

diff --git a/.github/workflows/detect-secrets.yml b/.github/workflows/detect-secrets.yml
new file mode 100644
index 000000000..bd360b52b
--- /dev/null
+++ b/.github/workflows/detect-secrets.yml
@@ -0,0 +1,46 @@
+name: Detect secrets on plugins
+
+on:
+  workflow_call:
+    inputs:
+      plugin:
+        type: string
+      os:
+        type: string
+      python-version:
+        type: string
+
+jobs:
+  detect-secrets:
+    defaults:
+      run:
+        shell: bash
+    runs-on: ${{ inputs.os }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ inputs.python-version }}
+      - name: Cache python packages
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{inputs.plugin}}-${{inputs.os}}-python-${{inputs.python-version}}
+          restore-keys: ${{inputs.plugin}}
+      - name: Install uv
+        run: |
+          python -m pip install "uv==0.2.21"
+      - name: Install dependencies
+        run: |
+          cd ${{ inputs.plugin }}
+          uv pip install --system "kedro @ git+https://github.com/kedro-org/kedro@main"
+          uv pip install --system "${{inputs.plugin}}[lint] @ ."
+          uv pip freeze --system
+      - name: Install pre-commit hooks
+        run: |
+          pre-commit install --install-hooks
+          pre-commit install --hook-type pre-push
+      - name: Scan all tracked files
+        run: git ls-files ":(glob)*" ${{ inputs.plugin }} -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline
diff --git a/.github/workflows/kedro-airflow.yml b/.github/workflows/kedro-airflow.yml
index 85e7ca62d..92c269ea2 100644
--- a/.github/workflows/kedro-airflow.yml
+++ b/.github/workflows/kedro-airflow.yml
@@ -46,3 +46,10 @@ jobs:
       plugin: kedro-airflow
       os: ${{ matrix.os }}
       python-version: ${{ matrix.python-version }}
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-airflow
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml
index d5aae0282..010115b73 100644
--- a/.github/workflows/kedro-datasets.yml
+++ b/.github/workflows/kedro-datasets.yml
@@ -61,3 +61,10 @@ jobs:
       - name: Documentation check for kedro-datasets
         run: |
           make check-datasets-docs
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-datasets
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-docker.yml b/.github/workflows/kedro-docker.yml
index 66783b3b5..16ffcbafe 100644
--- a/.github/workflows/kedro-docker.yml
+++ b/.github/workflows/kedro-docker.yml
@@ -46,3 +46,10 @@ jobs:
       plugin: kedro-docker
       os: ${{ matrix.os }}
       python-version: ${{ matrix.python-version }}
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-docker
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/kedro-telemetry.yml b/.github/workflows/kedro-telemetry.yml
index 5584ac775..aac47914e 100644
--- a/.github/workflows/kedro-telemetry.yml
+++ b/.github/workflows/kedro-telemetry.yml
@@ -35,3 +35,10 @@ jobs:
       plugin: kedro-telemetry
       os: ubuntu-latest
       python-version: "3.11"
+
+  detect-secrets:
+    uses: ./.github/workflows/detect-secrets.yml
+    with:
+      plugin: kedro-telemetry
+      os: ubuntu-latest
+      python-version: "3.11"
diff --git a/.github/workflows/validate-pr-title.yaml b/.github/workflows/validate-pr-title.yaml
index b6e6fc808..cb1e65327 100644
--- a/.github/workflows/validate-pr-title.yaml
+++ b/.github/workflows/validate-pr-title.yaml
@@ -19,5 +19,6 @@ jobs:
             datasets
             docker
             telemetry
+            all
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9f9706a34..9d2eb8de3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,6 +24,12 @@ repos:
       additional_dependencies:
         - black==22.12.0
 
+  - repo: https://github.com/Yelp/detect-secrets
+    rev: v1.5.0
+    hooks:
+      - id: detect-secrets
+        args: [ '--baseline', '.secrets.baseline' ]
+
   - repo: local
     hooks:
       - id: ruff-kedro-datasets
@@ -86,12 +92,6 @@ repos:
         pass_filenames: false
         entry: black kedro-telemetry/kedro_telemetry kedro-telemetry/tests
 
-      - id: secret_scan
-        name: "Secret scan"
-        language: system
-        pass_filenames: false
-        entry: make secret-scan
-
       - id: bandit
         name: "Bandit security check"
         language: system
diff --git a/.secrets.baseline b/.secrets.baseline
new file mode 100644
index 000000000..ce3799e06
--- /dev/null
+++ b/.secrets.baseline
@@ -0,0 +1,494 @@
+{
+  "version": "1.5.0",
+  "plugins_used": [
+    {
+      "name": "ArtifactoryDetector"
+    },
+    {
+      "name": "AWSKeyDetector"
+    },
+    {
+      "name": "AzureStorageKeyDetector"
+    },
+    {
+      "name": "Base64HighEntropyString",
+      "limit": 4.5
+    },
+    {
+      "name": "BasicAuthDetector"
+    },
+    {
+      "name": "CloudantDetector"
+    },
+    {
+      "name": "DiscordBotTokenDetector"
+    },
+    {
+      "name": "GitHubTokenDetector"
+    },
+    {
+      "name": "GitLabTokenDetector"
+    },
+    {
+      "name": "HexHighEntropyString",
+      "limit": 3.0
+    },
+    {
+      "name": "IbmCloudIamDetector"
+    },
+    {
+      "name": "IbmCosHmacDetector"
+    },
+    {
+      "name": "IPPublicDetector"
+    },
+    {
+      "name": "JwtTokenDetector"
+    },
+    {
+      "name": "KeywordDetector",
+      "keyword_exclude": ""
+    },
+    {
+      "name": "MailchimpDetector"
+    },
+    {
+      "name": "NpmDetector"
+    },
+    {
+      "name": "OpenAIDetector"
+    },
+    {
+      "name": "PrivateKeyDetector"
+    },
+    {
+      "name": "PypiTokenDetector"
+    },
+    {
+      "name": "SendGridDetector"
+    },
+    {
+      "name": "SlackDetector"
+    },
+    {
+      "name": "SoftlayerDetector"
+    },
+    {
+      "name": "SquareOAuthDetector"
+    },
+    {
+      "name": "StripeDetector"
+    },
+    {
+      "name": "TelegramBotTokenDetector"
+    },
+    {
+      "name": "TwilioKeyDetector"
+    }
+  ],
+  "filters_used": [
+    {
+      "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
+    },
+    {
+      "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
+      "min_level": 2
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_indirect_reference"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_likely_id_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_lock_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_potential_uuid"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_sequential_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_swagger_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_templated_secret"
+    }
+  ],
+  "results": {
+    "kedro-datasets/kedro_datasets/dask/parquet_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/dask/parquet_dataset.py",
+        "hashed_secret": "6e1d66a1596528c308e601c10aa0b92d53606ab9",
+        "is_verified": false,
+        "line_number": 71
+      }
+    ],
+    "kedro-datasets/kedro_datasets/pandas/sql_dataset.py": [
+      {
+        "type": "Basic Auth Credentials",
+        "filename": "kedro-datasets/kedro_datasets/pandas/sql_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 130
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/pandas/sql_dataset.py",
+        "hashed_secret": "e026e197bb77b12d16ab6986e068751f016d0ea5",
+        "is_verified": false,
+        "line_number": 382
+      }
+    ],
+    "kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py",
+        "hashed_secret": "a761ce3a45d97e41840a788495e85a70d1bb3815",
+        "is_verified": false,
+        "line_number": 83
+      }
+    ],
+    "kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 57
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py",
+        "hashed_secret": "b60d121b438a380c343d5ec3c2037564b82ffef3",
+        "is_verified": false,
+        "line_number": 44
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py",
+        "hashed_secret": "b60d121b438a380c343d5ec3c2037564b82ffef3",
+        "is_verified": false,
+        "line_number": 45
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 17
+      }
+    ],
+    "kedro-datasets/kedro_datasets_experimental/tests/video/test_video_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/kedro_datasets_experimental/tests/video/test_video_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 16
+      }
+    ],
+    "kedro-datasets/tests/dask/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 14
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 27
+      }
+    ],
+    "kedro-datasets/tests/dask/test_parquet_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_parquet_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 16
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/dask/test_parquet_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 29
+      }
+    ],
+    "kedro-datasets/tests/holoviews/test_holoviews_writer.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/holoviews/test_holoviews_writer.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 108
+      }
+    ],
+    "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py",
+        "hashed_secret": "dc724af18fbdd4e59189f5fe768a5f8311527050",
+        "is_verified": false,
+        "line_number": 16
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/matplotlib/test_matplotlib_writer.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 59
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 66
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 213
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 405
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_generic_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_generic_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 126
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_json_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_json_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 140
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_sql_dataset.py": [
+      {
+        "type": "Basic Auth Credentials",
+        "filename": "kedro-datasets/tests/pandas/test_sql_dataset.py",
+        "hashed_secret": "46e3d772a1888eadff26c7ada47fd7502d796e07",
+        "is_verified": false,
+        "line_number": 19
+      }
+    ],
+    "kedro-datasets/tests/pandas/test_xml_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/pandas/test_xml_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 117
+      }
+    ],
+    "kedro-datasets/tests/partitions/test_incremental_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_incremental_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 440
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_incremental_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 460
+      }
+    ],
+    "kedro-datasets/tests/partitions/test_partitioned_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "76f747de912e8682e29a23cb506dd5bf0de080d2",
+        "is_verified": false,
+        "line_number": 415
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "9027cc5a2c1321de60a2d71ccde6229d1152d6d3",
+        "is_verified": false,
+        "line_number": 416
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "5dcbdf371f181b9b7a41a4be7be70f8cbee67da7",
+        "is_verified": false,
+        "line_number": 452
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 503
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 523
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_html_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_html_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 70
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_json_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_json_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 83
+      }
+    ],
+    "kedro-datasets/tests/plotly/test_plotly_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/plotly/test_plotly_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 81
+      }
+    ],
+    "kedro-datasets/tests/polars/test_csv_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 65
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 159
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_csv_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 351
+      }
+    ],
+    "kedro-datasets/tests/polars/test_eager_polars_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_eager_polars_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 126
+      }
+    ],
+    "kedro-datasets/tests/polars/test_lazy_polars_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_lazy_polars_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 93
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/polars/test_lazy_polars_dataset.py",
+        "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
+        "is_verified": false,
+        "line_number": 198
+      }
+    ],
+    "kedro-datasets/tests/snowflake/test_snowpark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/snowflake/test_snowpark_dataset.py",
+        "hashed_secret": "1365dbfe676a193420ed7981184720b426ef2b7a",
+        "is_verified": false,
+        "line_number": 32
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 42
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_jdbc_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_jdbc_dataset.py",
+        "hashed_secret": "4f4fa638cf19a2919f12e0105085c123ca5c5172",
+        "is_verified": false,
+        "line_number": 15
+      }
+    ],
+    "kedro-datasets/tests/spark/test_spark_streaming_dataset.py": [
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_streaming_dataset.py",
+        "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
+        "is_verified": false,
+        "line_number": 17
+      },
+      {
+        "type": "Secret Keyword",
+        "filename": "kedro-datasets/tests/spark/test_spark_streaming_dataset.py",
+        "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
+        "is_verified": false,
+        "line_number": 64
+      }
+    ]
+  },
+  "generated_at": "2025-01-13T16:27:46Z"
+}
diff --git a/Makefile b/Makefile
index c7946d605..e8c8a4e08 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ install-pip-setuptools:
 	python -m pip install -U pip setuptools wheel
 
 lint:
-	pre-commit run -a --hook-stage manual ruff-$(plugin) && pre-commit run trailing-whitespace --all-files && pre-commit run end-of-file-fixer --all-files && pre-commit run check-yaml --all-files && pre-commit run check-added-large-files --all-files && pre-commit run check-case-conflict --all-files && pre-commit run check-merge-conflict --all-files && pre-commit run debug-statements --all-files && pre-commit run black-$(plugin) --all-files --hook-stage manual && pre-commit run secret_scan --all-files --hook-stage manual && pre-commit run bandit --all-files --hook-stage manual
+	pre-commit run -a --hook-stage manual ruff-$(plugin) && pre-commit run trailing-whitespace --all-files && pre-commit run end-of-file-fixer --all-files && pre-commit run check-yaml --all-files && pre-commit run check-added-large-files --all-files && pre-commit run check-case-conflict --all-files && pre-commit run check-merge-conflict --all-files && pre-commit run debug-statements --all-files && pre-commit run black-$(plugin) --all-files --hook-stage manual && pre-commit run bandit --all-files --hook-stage manual
 	$(MAKE) mypy
 
 mypy:
@@ -21,9 +21,6 @@ test:
 e2e-tests:
 	cd $(plugin) && behave
 
-secret-scan:
-	trufflehog --max_depth 1 --exclude_paths trufflehog-ignore.txt .
-
 install-test-requirements:
 	cd $(plugin) && uv pip install ".[test]"
 
diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index 6bd0b7163..348945ac9 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming Release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.9.2
 * Removed support for Python 3.8
diff --git a/kedro-airflow/pyproject.toml b/kedro-airflow/pyproject.toml
index ec7563cdd..6ef8a8b40 100644
--- a/kedro-airflow/pyproject.toml
+++ b/kedro-airflow/pyproject.toml
@@ -38,9 +38,9 @@ test = [
 lint = [
     "bandit",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
     # mypy requirements
     "types-PyYAML",
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 27df63f78..15c13da84 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,6 +1,8 @@
 # Upcoming Release
 ## Major features and improvements
 
+- Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
+
 ## Bug fixes and other changes
 
 - Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 3ee8eb9e9..1fcde25c6 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -270,11 +270,11 @@ lint = [
     "bandit>=1.6.2, <2.0",
     "blacken-docs==1.9.2",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "import-linter[toml]==1.2.6",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
     "ruff~=0.0.290",
-    "trufflehog~=2.1",
     # mypy related dependencies
     "types-cachetools",
     "types-PyYAML",
diff --git a/kedro-docker/RELEASE.md b/kedro-docker/RELEASE.md
index f81181579..b7bab9313 100644
--- a/kedro-docker/RELEASE.md
+++ b/kedro-docker/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming Release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 
diff --git a/kedro-docker/pyproject.toml b/kedro-docker/pyproject.toml
index 15c8d04fc..b669a0e2d 100644
--- a/kedro-docker/pyproject.toml
+++ b/kedro-docker/pyproject.toml
@@ -39,9 +39,9 @@ test = [
 lint = [
     "bandit",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
 ]
 
diff --git a/kedro-telemetry/RELEASE.md b/kedro-telemetry/RELEASE.md
index df7bb603a..1b4fce80f 100644
--- a/kedro-telemetry/RELEASE.md
+++ b/kedro-telemetry/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming release
+* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 * Removed support for Python 3.8
diff --git a/kedro-telemetry/pyproject.toml b/kedro-telemetry/pyproject.toml
index 45f9d995d..1f43f2315 100644
--- a/kedro-telemetry/pyproject.toml
+++ b/kedro-telemetry/pyproject.toml
@@ -35,9 +35,9 @@ test = [
 lint = [
     "bandit>=1.6.2, <2.0",
     "black~=22.0",
+    "detect-secrets~=1.5.0",
     "mypy~=1.0",
     "pre-commit>=2.9.2",
-    "trufflehog>=2.1.0, <3.0",
     "ruff~=0.0.290",
     # mypy requirements
     "types-requests",
diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt
deleted file mode 100644
index 1929a2634..000000000
--- a/trufflehog-ignore.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-kedro-telemetry/README.md
-kedro-telemetry/RELEASE.md
-kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py

From 7ed49f96671aaad4bc042894f84fd92c13a28750 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 15 Jan 2025 07:43:25 -0700
Subject: [PATCH 06/24] build(datasets): use intersphinx over type_targets
 (#801)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
Co-authored-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/docs/source/conf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py
index 039658936..90c66bd83 100644
--- a/kedro-datasets/docs/source/conf.py
+++ b/kedro-datasets/docs/source/conf.py
@@ -102,6 +102,7 @@
 intersphinx_mapping = {
     "kedro": ("https://docs.kedro.org/en/stable/", None),
     "python": ("https://docs.python.org/3.10/", None),
+    "requests": ("https://requests.readthedocs.io/en/stable/", None),
 }
 
 type_targets = {
@@ -110,8 +111,6 @@
         "kedro.io.AbstractDataset",
         "AbstractDataset",
         "kedro.io.core.Version",
-        "requests.auth.AuthBase",
-        "requests.models.Response",
         "google.oauth2.credentials.Credentials",
         "deltalake.table.Metadata",
         "DataCatalog",

From fb9a0996fc9ba3b3c624e4b39c1b4478d4d00e1e Mon Sep 17 00:00:00 2001
From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Date: Wed, 22 Jan 2025 12:28:30 +0000
Subject: [PATCH 07/24] fix(datasets): Add parameter to enable/disable lazy
 saving for `PartitionedDataset` (#978)

* Replaced callable check

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updateds lazy_save test

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added test_callable_save

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed lint

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed docs links

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed all docs links

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed all docs links

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed typo

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added argument to disable lazy saving

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Removed save function argument

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated unit test

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Fixed lint

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated related docs

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Revert test changes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated baseline

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated release notes

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated docstrings

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 .secrets.baseline                             | 14 +++++------
 kedro-datasets/RELEASE.md                     |  1 +
 .../kedro_datasets/dask/csv_dataset.py        |  5 ++--
 .../kedro_datasets/dask/parquet_dataset.py    |  5 ++--
 .../partitions/partitioned_dataset.py         | 10 +++++++-
 .../partitions/test_partitioned_dataset.py    | 23 +++++++++++++++++++
 6 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index ce3799e06..c18f3f6f1 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -129,7 +129,7 @@
         "filename": "kedro-datasets/kedro_datasets/dask/parquet_dataset.py",
         "hashed_secret": "6e1d66a1596528c308e601c10aa0b92d53606ab9",
         "is_verified": false,
-        "line_number": 71
+        "line_number": 72
       }
     ],
     "kedro-datasets/kedro_datasets/pandas/sql_dataset.py": [
@@ -340,35 +340,35 @@
         "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
         "hashed_secret": "76f747de912e8682e29a23cb506dd5bf0de080d2",
         "is_verified": false,
-        "line_number": 415
+        "line_number": 438
       },
       {
         "type": "Secret Keyword",
         "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
         "hashed_secret": "9027cc5a2c1321de60a2d71ccde6229d1152d6d3",
         "is_verified": false,
-        "line_number": 416
+        "line_number": 439
       },
       {
         "type": "Secret Keyword",
         "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
         "hashed_secret": "5dcbdf371f181b9b7a41a4be7be70f8cbee67da7",
         "is_verified": false,
-        "line_number": 452
+        "line_number": 475
       },
       {
         "type": "Secret Keyword",
         "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
         "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b",
         "is_verified": false,
-        "line_number": 503
+        "line_number": 526
       },
       {
         "type": "Secret Keyword",
         "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py",
         "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
         "is_verified": false,
-        "line_number": 523
+        "line_number": 546
       }
     ],
     "kedro-datasets/tests/plotly/test_html_dataset.py": [
@@ -490,5 +490,5 @@
       }
     ]
   },
-  "generated_at": "2025-01-13T16:27:46Z"
+  "generated_at": "2025-01-15T15:25:24Z"
 }
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 15c13da84..820388766 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,6 +1,7 @@
 # Upcoming Release
 ## Major features and improvements
 
+- Added a parameter to enable/disable lazy saving for `PartitionedDataset`.
 - Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 ## Bug fixes and other changes
diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
index bc5b5764b..b82bff15e 100644
--- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
@@ -1,5 +1,6 @@
 """``CSVDataset`` is a dataset used to load and save data to CSV files using Dask
 dataframe"""
+
 from __future__ import annotations
 
 from copy import deepcopy
@@ -13,7 +14,7 @@
 class CSVDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]):
     """``CSVDataset`` loads and saves data to comma-separated value file(s). It uses Dask
     remote data services to handle the corresponding load and save operations:
-    https://docs.dask.org/en/latest/how-to/connect-to-remote-data.html
+    https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html
 
     Example usage for the
     `YAML API <https://docs.kedro.org/en/stable/data/data_catalog_yaml_examples.html>`_:
@@ -73,7 +74,7 @@ def __init__(  # noqa: PLR0913
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
-                https://docs.dask.org/en/latest/how-to/connect-to-remote-data.html#optional-parameters
+                https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html#optional-parameters
             metadata: Any arbitrary metadata.
                 This is ignored by Kedro, but may be consumed by users or external plugins.
         """
diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
index 3b2dff73e..b3a81c632 100644
--- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -1,5 +1,6 @@
 """``ParquetDataset`` is a dataset used to load and save data to parquet files using Dask
 dataframe"""
+
 from __future__ import annotations
 
 from copy import deepcopy
@@ -14,7 +15,7 @@
 class ParquetDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]):
     """``ParquetDataset`` loads and saves data to parquet file(s). It uses Dask
     remote data services to handle the corresponding load and save operations:
-    https://docs.dask.org/en/latest/how-to/connect-to-remote-data.html
+    https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html
 
     Example usage for the
     `YAML API <https://docs.kedro.org/en/stable/data/data_catalog_yaml_examples.html>`_:
@@ -103,7 +104,7 @@ def __init__(  # noqa: PLR0913
             credentials: Credentials required to get access to the underlying filesystem.
                 E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
             fs_args: Optional parameters to the backend file system driver:
-                https://docs.dask.org/en/latest/how-to/connect-to-remote-data.html#optional-parameters
+                https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html#optional-parameters
             metadata: Any arbitrary metadata.
                 This is ignored by Kedro, but may be consumed by users or external plugins.
         """
diff --git a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py
index ea2461034..cf1069b1a 100644
--- a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py
+++ b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py
@@ -69,6 +69,7 @@ class PartitionedDataset(AbstractDataset[dict[str, Any], dict[str, Callable[[],
               sep: '\\t'
               index: true
           filename_suffix: '.dat'
+          save_lazily: True
 
     Example usage for the
     `Python API <https://docs.kedro.org/en/stable/data/\
@@ -93,6 +94,7 @@ class PartitionedDataset(AbstractDataset[dict[str, Any], dict[str, Callable[[],
         ...     path=str(tmp_path / "df_with_partition"),
         ...     dataset="pandas.CSVDataset",
         ...     filename_suffix=".csv",
+        ...     save_lazily=False
         ... )
         >>> # This will create a folder `df_with_partition` and save multiple files
         >>> # with the dict key + filename_suffix as filename, i.e. 1.csv, 2.csv etc.
@@ -152,6 +154,7 @@ def __init__(  # noqa: PLR0913
         load_args: dict[str, Any] | None = None,
         fs_args: dict[str, Any] | None = None,
         overwrite: bool = False,
+        save_lazily: bool = True,
         metadata: dict[str, Any] | None = None,
     ) -> None:
         """Creates a new instance of ``PartitionedDataset``.
@@ -191,6 +194,10 @@ def __init__(  # noqa: PLR0913
             fs_args: Extra arguments to pass into underlying filesystem class constructor
                 (e.g. `{"project": "my-project"}` for ``GCSFileSystem``).
             overwrite: If True, any existing partitions will be removed.
+            save_lazily: Parameter to enable/disable lazy saving, the default is True. Meaning that if callable object
+                is passed as data to save, the partition’s data will not be materialised until it is time to write.
+                Lazy saving example:
+                https://docs.kedro.org/en/stable/data/kedro_io.html#partitioned-dataset-lazy-saving
             metadata: Any arbitrary metadata.
                 This is ignored by Kedro, but may be consumed by users or external plugins.
 
@@ -206,6 +213,7 @@ def __init__(  # noqa: PLR0913
         self._overwrite = overwrite
         self._protocol = infer_storage_options(self._path)["protocol"]
         self._partition_cache: Cache = Cache(maxsize=1)
+        self._save_lazily = save_lazily
         self.metadata = metadata
 
         dataset = dataset if isinstance(dataset, dict) else {"type": dataset}
@@ -311,7 +319,7 @@ def save(self, data: dict[str, Any]) -> None:
             # join the protocol back since tools like PySpark may rely on it
             kwargs[self._filepath_arg] = self._join_protocol(partition)
             dataset = self._dataset_type(**kwargs)  # type: ignore
-            if callable(partition_data):
+            if callable(partition_data) and self._save_lazily:
                 partition_data = partition_data()  # noqa: PLW2901
             dataset.save(partition_data)
         self._invalidate_caches()
diff --git a/kedro-datasets/tests/partitions/test_partitioned_dataset.py b/kedro-datasets/tests/partitions/test_partitioned_dataset.py
index f0126887d..9a49d3bb8 100644
--- a/kedro-datasets/tests/partitions/test_partitioned_dataset.py
+++ b/kedro-datasets/tests/partitions/test_partitioned_dataset.py
@@ -52,6 +52,10 @@ def filepath_csvs(tmp_path):
 ]
 
 
+def original_data_callable():
+    return pd.DataFrame({"foo": 42, "bar": ["a", "b", None]})
+
+
 class FakeDataset:  # pylint: disable=too-few-public-methods
     pass
 
@@ -101,6 +105,25 @@ def test_save(self, dataset, local_csvs, suffix):
         reloaded_data = loaded_partitions[part_id]()
         assert_frame_equal(reloaded_data, original_data)
 
+    @pytest.mark.parametrize("dataset", ["kedro_datasets.pickle.PickleDataset"])
+    @pytest.mark.parametrize("suffix", ["", ".csv"])
+    def test_callable_save(self, dataset, local_csvs, suffix):
+        pds = PartitionedDataset(
+            path=str(local_csvs),
+            dataset=dataset,
+            filename_suffix=suffix,
+            save_lazily=False,
+        )
+
+        part_id = "new/data"
+        pds.save({part_id: original_data_callable})
+
+        assert (local_csvs / "new" / ("data" + suffix)).is_file()
+        loaded_partitions = pds.load()
+        assert part_id in loaded_partitions
+        reloaded_data = loaded_partitions[part_id]()
+        assert reloaded_data == original_data_callable
+
     @pytest.mark.parametrize("dataset", LOCAL_DATASET_DEFINITION)
     @pytest.mark.parametrize("suffix", ["", ".csv"])
     def test_lazy_save(self, dataset, local_csvs, suffix):

From 4a15b80e955ce49ced8fc63d693142f844597edd Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 08:39:25 -0700
Subject: [PATCH 08/24] fix(datasets): use kwarg for Ibis `read_*` methods
 (#1005)

* fix(datasets): use kwarg for Ibis `read_*` methods

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Update RELEASE.md

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md                          | 1 +
 kedro-datasets/kedro_datasets/ibis/file_dataset.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 820388766..2d0480807 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -7,6 +7,7 @@
 ## Bug fixes and other changes
 
 - Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
+- Made `table_name` a keyword argument in the `ibis.FileDataset` implementation to be compatible with Ibis 10.0.
 
 ## Breaking Changes
 
diff --git a/kedro-datasets/kedro_datasets/ibis/file_dataset.py b/kedro-datasets/kedro_datasets/ibis/file_dataset.py
index c3c43b74f..82ad0c29c 100644
--- a/kedro-datasets/kedro_datasets/ibis/file_dataset.py
+++ b/kedro-datasets/kedro_datasets/ibis/file_dataset.py
@@ -160,7 +160,7 @@ def connection(self) -> BaseBackend:
     def load(self) -> ir.Table:
         load_path = self._get_load_path()
         reader = getattr(self.connection, f"read_{self._file_format}")
-        return reader(load_path, self._table_name, **self._load_args)
+        return reader(load_path, table_name=self._table_name, **self._load_args)
 
     def save(self, data: ir.Table) -> None:
         save_path = self._get_save_path()

From e6b22c3e9a7180a9a04cd677222483626e94d889 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 09:09:15 -0700
Subject: [PATCH 09/24] build(datasets): pin PyArrow until `19.0.1` is out
 (#1006)

* build(datasets): pin PyArrow until `19.0.1` is out

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore(datasets): exclude `19.0.0` instead of bound

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 1fcde25c6..7adad9136 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -200,7 +200,7 @@ docs = [
 
 # Test requirements
 test = [
-    "accelerate<0.32", # Temporary pin
+    "accelerate<0.32",  # Temporary pin
     "adlfs~=2023.1",
     "behave==1.2.6",
     "biopython~=1.73",
@@ -236,6 +236,7 @@ test = [
     "polars[deltalake,xlsx2csv]>=1.0",
     "pyarrow>=1.0; python_version < '3.11'",
     "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
+    "pyarrow!=19.0.0",  # Temporary pin until https://github.com/apache/arrow/issues/45283 is fixed
     "pyodbc~=5.0",
     "pyspark>=3.0; python_version < '3.11'",
     "pyspark>=3.4; python_version >= '3.11'",

From 0fed73ce47327b04c32ec2e8b2274e328a0ee1fe Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 09:31:02 -0700
Subject: [PATCH 10/24] build(datasets): update list of extras for Ibis 10
 (#1003)

* build(datasets): update list of extras for Ibis 10

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Update RELEASE.md

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md     | 4 +++-
 kedro-datasets/pyproject.toml | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 2d0480807..b480237f7 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -1,12 +1,14 @@
 # Upcoming Release
+
 ## Major features and improvements
 
 - Added a parameter to enable/disable lazy saving for `PartitionedDataset`.
 - Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
+- Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
 
 ## Bug fixes and other changes
 
-- Fix polars.CSVDataset `save` method on Windows using `utf-8` as default encoding.
+- Fixed `polars.CSVDataset` `save` method on Windows using `utf-8` as default encoding.
 - Made `table_name` a keyword argument in the `ibis.FileDataset` implementation to be compatible with Ibis 10.0.
 
 ## Breaking Changes
diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml
index 7adad9136..0327eea93 100644
--- a/kedro-datasets/pyproject.toml
+++ b/kedro-datasets/pyproject.toml
@@ -40,7 +40,7 @@ dask = ["kedro-datasets[dask-parquetdataset, dask-csvdataset]"]
 databricks-managedtabledataset = ["kedro-datasets[hdfs-base,s3fs-base]"]
 databricks = ["kedro-datasets[databricks-managedtabledataset]"]
 
-geopandas-genericdataset = ["geopandas>=0.8.0, <2.0", "fiona >=1.8, <2.0"]
+geopandas-genericdataset = ["geopandas>=0.8.0, <2.0", "fiona>=1.8, <2.0"]
 geopandas = ["kedro-datasets[geopandas-genericdataset]"]
 
 holoviews-holoviewswriter = ["holoviews>=1.13.0"]
@@ -50,9 +50,11 @@ huggingface-hfdataset = ["datasets", "huggingface_hub"]
 huggingface-hftransformerpipelinedataset = ["transformers"]
 huggingface = ["kedro-datasets[huggingface-hfdataset,huggingface-hftransformerpipelinedataset]"]
 
+ibis-athena = ["ibis-framework[athena]"]
 ibis-bigquery = ["ibis-framework[bigquery]"]
 ibis-clickhouse = ["ibis-framework[clickhouse]"]
-ibis-dask = ["ibis-framework[dask]"]
+ibis-dask = ["ibis-framework[dask]<10.0"]
+ibis-databricks = ["ibis-framework[databricks]"]
 ibis-datafusion = ["ibis-framework[datafusion]"]
 ibis-druid = ["ibis-framework[druid]"]
 ibis-duckdb = ["ibis-framework[duckdb]"]
@@ -62,7 +64,7 @@ ibis-impala = ["ibis-framework[impala]"]
 ibis-mssql = ["ibis-framework[mssql]"]
 ibis-mysql = ["ibis-framework[mysql]"]
 ibis-oracle = ["ibis-framework[oracle]"]
-ibis-pandas = ["ibis-framework[pandas]"]
+ibis-pandas = ["ibis-framework[pandas]<10.0"]
 ibis-polars = ["ibis-framework[polars]"]
 ibis-postgres = ["ibis-framework[postgres]"]
 ibis-pyspark = ["ibis-framework[pyspark]"]

From a652129601ef7cb718f775076748a0c5559ab90c Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 14:19:53 -0700
Subject: [PATCH 11/24] chore: remove internal devtools from release notes
 (#1004)

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/RELEASE.md   | 1 -
 kedro-datasets/RELEASE.md  | 1 -
 kedro-docker/RELEASE.md    | 1 -
 kedro-telemetry/RELEASE.md | 1 -
 4 files changed, 4 deletions(-)

diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index 348945ac9..6bd0b7163 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,5 +1,4 @@
 # Upcoming Release
-* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.9.2
 * Removed support for Python 3.8
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index b480237f7..dd24582de 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -3,7 +3,6 @@
 ## Major features and improvements
 
 - Added a parameter to enable/disable lazy saving for `PartitionedDataset`.
-- Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 - Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
 
 ## Bug fixes and other changes
diff --git a/kedro-docker/RELEASE.md b/kedro-docker/RELEASE.md
index b7bab9313..f81181579 100644
--- a/kedro-docker/RELEASE.md
+++ b/kedro-docker/RELEASE.md
@@ -1,5 +1,4 @@
 # Upcoming Release
-* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 
diff --git a/kedro-telemetry/RELEASE.md b/kedro-telemetry/RELEASE.md
index 1b4fce80f..df7bb603a 100644
--- a/kedro-telemetry/RELEASE.md
+++ b/kedro-telemetry/RELEASE.md
@@ -1,5 +1,4 @@
 # Upcoming release
-* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.
 
 # Release 0.6.2
 * Removed support for Python 3.8

From c384f0c7dd431a3647ed98319a95f49644c68bee Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Thu, 13 Feb 2025 13:33:21 -0500
Subject: [PATCH 12/24] 998: Fixed case where MemoryDatasets in catalog
 wouldn't trigger `_is_memory_dataset`

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/kedro_airflow/grouping.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kedro-airflow/kedro_airflow/grouping.py b/kedro-airflow/kedro_airflow/grouping.py
index 3890804ae..31151e6d2 100644
--- a/kedro-airflow/kedro_airflow/grouping.py
+++ b/kedro-airflow/kedro_airflow/grouping.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from kedro.io import DataCatalog
+from kedro.io import DataCatalog, MemoryDataset
 from kedro.pipeline.node import Node
 from kedro.pipeline.pipeline import Pipeline
 
@@ -11,9 +11,11 @@
 
 
 def _is_memory_dataset(catalog, dataset_name: str) -> bool:
+    """Return whether a dataset is a MemoryDataset or not."""
     if dataset_name not in catalog:
         return True
-    return False
+    else:
+        return isinstance(catalog.datasets[dataset_name], MemoryDataset)
 
 
 def get_memory_datasets(

From b9005e6f02e41c79fbff8d8bbabb42c9256df330 Mon Sep 17 00:00:00 2001
From: Richard Asselin <richard.asselin@gmail.com>
Date: Fri, 14 Feb 2025 09:47:26 -0500
Subject: [PATCH 13/24] 998: Tests to ensure that MemoryDatasets are passed in
 mocked data catalog

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index 84f551545..e6b7faccb 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -3,7 +3,7 @@
 from typing import Any
 
 import pytest
-from kedro.io import AbstractDataset, DataCatalog
+from kedro.io import AbstractDataset, DataCatalog, MemoryDataset
 from kedro.pipeline import Pipeline, node
 from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline
 
@@ -21,12 +21,15 @@ def _load(self):
         return []
 
 
-def mock_data_catalog(nodes: list[str], memory_nodes: set[str]) -> DataCatalog:
+def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False) -> DataCatalog:
     mock_catalog = DataCatalog()
     for dataset_name in nodes:
         if dataset_name not in memory_nodes:
             dataset = TestDataset()
             mock_catalog.add(dataset_name, dataset)
+        elif memory_nodes_in_catalog:
+            mock_catalog.add(dataset_name, MemoryDataset())
+
 
     return mock_catalog
 
@@ -143,8 +146,16 @@ def test_group_memory_nodes(
         ),
     ],
 )
-def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str]):
-    mock_catalog = mock_data_catalog(nodes, memory_nodes)
+@pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
+def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
+    """Tests for the `_is_memory_dataset` function.
+
+    Args:
+        nodes: list of nodes to add to the catalog
+        memory_nodes: set of nodes which should be considered MemoryDatasets
+        memory_nodes_in_catalog: whether to add MemoryDatasets to the catalog or not
+    """
+    mock_catalog = mock_data_catalog(nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog)
     for node_name in nodes:
         if node_name in memory_nodes:
             assert _is_memory_dataset(mock_catalog, node_name)

From 9fe874af4877806b9e4879442c0d47462af0e089 Mon Sep 17 00:00:00 2001
From: Richard Asselin <richard.asselin@gmail.com>
Date: Fri, 14 Feb 2025 10:36:43 -0500
Subject: [PATCH 14/24] 998: Changelog

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index 6bd0b7163..72032b0e1 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,4 +1,5 @@
 # Upcoming Release
+* Fixed case where MemoryDatasets in catalog wouldn't be collapsed correctly
 
 # Release 0.9.2
 * Removed support for Python 3.8

From 4dd6619bcb45d117529fbb7dbff353cc5edfa965 Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Mon, 17 Feb 2025 06:57:32 -0500
Subject: [PATCH 15/24] 998: Linting fixes

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index e6b7faccb..aa0b3c5f0 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -21,7 +21,9 @@ def _load(self):
         return []
 
 
-def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False) -> DataCatalog:
+def mock_data_catalog(
+    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool = False
+) -> DataCatalog:
     mock_catalog = DataCatalog()
     for dataset_name in nodes:
         if dataset_name not in memory_nodes:
@@ -30,7 +32,6 @@ def mock_data_catalog(nodes: list[str], memory_nodes: set[str], memory_nodes_in_
         elif memory_nodes_in_catalog:
             mock_catalog.add(dataset_name, MemoryDataset())
 
-
     return mock_catalog
 
 
@@ -147,7 +148,9 @@ def test_group_memory_nodes(
     ],
 )
 @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
-def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
+def test_is_memory_dataset(
+    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
+):
     """Tests for the `_is_memory_dataset` function.
 
     Args:
@@ -155,7 +158,9 @@ def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_node
         memory_nodes: set of nodes which should be considered MemoryDatasets
         memory_nodes_in_catalog: whether to add MemoryDatasets to the catalog or not
     """
-    mock_catalog = mock_data_catalog(nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog)
+    mock_catalog = mock_data_catalog(
+        nodes, memory_nodes, memory_nodes_in_catalog=memory_nodes_in_catalog
+    )
     for node_name in nodes:
         if node_name in memory_nodes:
             assert _is_memory_dataset(mock_catalog, node_name)

From 15a0e3cf2c846c6658e8e541b44d9407439cdfb1 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 09:31:02 -0700
Subject: [PATCH 16/24] build(datasets): update list of extras for Ibis 10
 (#1003)

* build(datasets): update list of extras for Ibis 10

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Update RELEASE.md

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index dd24582de..06a1fa468 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -4,6 +4,7 @@
 
 - Added a parameter to enable/disable lazy saving for `PartitionedDataset`.
 - Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
+- Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
 
 ## Bug fixes and other changes
 

From 6060e67a0b105560cbefad5fcd7bd6f32f218394 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 10 Feb 2025 14:19:53 -0700
Subject: [PATCH 17/24] chore: remove internal devtools from release notes
 (#1004)

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* chore: remove internal devtools from release notes

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com>
Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-datasets/RELEASE.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 06a1fa468..dd24582de 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -4,7 +4,6 @@
 
 - Added a parameter to enable/disable lazy saving for `PartitionedDataset`.
 - Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
-- Added `ibis-athena` and `ibis-databricks` extras for the backends added in Ibis 10.0.
 
 ## Bug fixes and other changes
 

From e49f25dde291ac475a2cd5c96dfe68ca9f8a4097 Mon Sep 17 00:00:00 2001
From: Richard Asselin <richard.asselin@gmail.com>
Date: Fri, 14 Feb 2025 09:47:26 -0500
Subject: [PATCH 18/24] 998: Tests to ensure that MemoryDatasets are passed in
 mocked data catalog

Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index aa0b3c5f0..a2243fe7b 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -32,6 +32,7 @@ def mock_data_catalog(
         elif memory_nodes_in_catalog:
             mock_catalog.add(dataset_name, MemoryDataset())
 
+
     return mock_catalog
 
 
@@ -148,9 +149,7 @@ def test_group_memory_nodes(
     ],
 )
 @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
-def test_is_memory_dataset(
-    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
-):
+def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
     """Tests for the `_is_memory_dataset` function.
 
     Args:

From cffbaa27b4161899c5dc756a516a094c1cc3590e Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Mon, 17 Feb 2025 06:57:32 -0500
Subject: [PATCH 19/24] 998: Linting fixes

Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index a2243fe7b..aa0b3c5f0 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -32,7 +32,6 @@ def mock_data_catalog(
         elif memory_nodes_in_catalog:
             mock_catalog.add(dataset_name, MemoryDataset())
 
-
     return mock_catalog
 
 
@@ -149,7 +148,9 @@ def test_group_memory_nodes(
     ],
 )
 @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
-def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
+def test_is_memory_dataset(
+    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
+):
     """Tests for the `_is_memory_dataset` function.
 
     Args:

From b07b8cb6e682db787818709b59cea3d9cbd0db1f Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Tue, 18 Feb 2025 07:17:52 -0500
Subject: [PATCH 20/24] 998: Changed function according to PR comments

Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/kedro_airflow/grouping.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kedro-airflow/kedro_airflow/grouping.py b/kedro-airflow/kedro_airflow/grouping.py
index 31151e6d2..a5bcd0ffd 100644
--- a/kedro-airflow/kedro_airflow/grouping.py
+++ b/kedro-airflow/kedro_airflow/grouping.py
@@ -12,10 +12,9 @@
 
 def _is_memory_dataset(catalog, dataset_name: str) -> bool:
     """Return whether a dataset is a MemoryDataset or not."""
-    if dataset_name not in catalog:
-        return True
-    else:
-        return isinstance(catalog.datasets[dataset_name], MemoryDataset)
+    return dataset_name not in catalog or isinstance(
+        catalog._get_dataset(dataset_name), MemoryDataset
+    )
 
 
 def get_memory_datasets(

From 2ec88b0716f10490e820a9143e1fa01252e3aace Mon Sep 17 00:00:00 2001
From: Richard Asselin <richard.asselin@gmail.com>
Date: Fri, 14 Feb 2025 09:47:26 -0500
Subject: [PATCH 21/24] 998: Tests to ensure that MemoryDatasets are passed in
 mocked data catalog

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index aa0b3c5f0..a2243fe7b 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -32,6 +32,7 @@ def mock_data_catalog(
         elif memory_nodes_in_catalog:
             mock_catalog.add(dataset_name, MemoryDataset())
 
+
     return mock_catalog
 
 
@@ -148,9 +149,7 @@ def test_group_memory_nodes(
     ],
 )
 @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
-def test_is_memory_dataset(
-    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
-):
+def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
     """Tests for the `_is_memory_dataset` function.
 
     Args:

From 69aad578ee7da68c58d94c0749db94937d3f1715 Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Mon, 17 Feb 2025 06:57:32 -0500
Subject: [PATCH 22/24] 998: Linting fixes

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/tests/test_node_grouping.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kedro-airflow/tests/test_node_grouping.py b/kedro-airflow/tests/test_node_grouping.py
index a2243fe7b..aa0b3c5f0 100644
--- a/kedro-airflow/tests/test_node_grouping.py
+++ b/kedro-airflow/tests/test_node_grouping.py
@@ -32,7 +32,6 @@ def mock_data_catalog(
         elif memory_nodes_in_catalog:
             mock_catalog.add(dataset_name, MemoryDataset())
 
-
     return mock_catalog
 
 
@@ -149,7 +148,9 @@ def test_group_memory_nodes(
     ],
 )
 @pytest.mark.parametrize("memory_nodes_in_catalog", (True, False))
-def test_is_memory_dataset(nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool):
+def test_is_memory_dataset(
+    nodes: list[str], memory_nodes: set[str], memory_nodes_in_catalog: bool
+):
     """Tests for the `_is_memory_dataset` function.
 
     Args:

From fbb80814bc45eb15c4b5c7292e746587e62abb88 Mon Sep 17 00:00:00 2001
From: Richard Asselin <richard.asselin@gmail.com>
Date: Tue, 18 Feb 2025 10:11:25 -0500
Subject: [PATCH 23/24] 998: Tweaked release

Signed-off-by: Richard Asselin <richard.asselin@gmail.com>
---
 kedro-airflow/RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index 72032b0e1..d5d2333df 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,5 +1,5 @@
 # Upcoming Release
-* Fixed case where MemoryDatasets in catalog wouldn't be collapsed correctly
+* Fixed case where MemoryDatasets in catalog wouldn't be detected correctly
 
 # Release 0.9.2
 * Removed support for Python 3.8

From 8eb9beba4a3b2b443c6e8dfe90de47861000da30 Mon Sep 17 00:00:00 2001
From: Richard <CF-FHB-X@users.noreply.github.com>
Date: Fri, 21 Feb 2025 11:54:54 -0500
Subject: [PATCH 24/24] Update RELEASE.md

Co-authored-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Signed-off-by: Richard <CF-FHB-X@users.noreply.github.com>
---
 kedro-airflow/RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md
index d5d2333df..6bee29ad2 100755
--- a/kedro-airflow/RELEASE.md
+++ b/kedro-airflow/RELEASE.md
@@ -1,5 +1,5 @@
 # Upcoming Release
-* Fixed case where MemoryDatasets in catalog wouldn't be detected correctly
+* Fixed check whether a dataset is a `MemoryDataset`.
 
 # Release 0.9.2
 * Removed support for Python 3.8