kedro-org · deepyaman · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023
@@ -1,27 +1,30 @@
 from pathlib import Path
 
-from kedro.templates.project.hooks.utils import (
-
-    setup_template_tools,
-    sort_requirements,
-)
+from kedro.templates.project.hooks.utils import setup_template_tools, sort_requirements
 
 
 def main():
     current_dir = Path.cwd()
     requirements_file_path = current_dir / "requirements.txt"
     pyproject_file_path = current_dir / "pyproject.toml"
-    python_package_name = '{{ cookiecutter.python_package }}'
+    python_package_name = "{{ cookiecutter.python_package }}"
 
     # Get the selected tools from cookiecutter
     selected_tools = "{{ cookiecutter.tools }}"
     example_pipeline = "{{ cookiecutter.example_pipeline }}"
 
     # Handle template directories and requirements according to selected tools
-    setup_template_tools(selected_tools, requirements_file_path, pyproject_file_path, python_package_name, example_pipeline)
+    setup_template_tools(
+        selected_tools,
+        requirements_file_path,
+        pyproject_file_path,
+        python_package_name,
+        example_pipeline,
+    )
 
     # Sort requirements.txt file in alphabetical order
     sort_requirements(requirements_file_path)
 
+
 if __name__ == "__main__":
     main()
@@ -2,15 +2,22 @@
 import shutil
 import toml
 
+from pre_commit_hooks.requirements_txt_fixer import fix_requirements
+
 current_dir = Path.cwd()
 
 # Requirements for linting tools
 lint_requirements = "black~=22.0\nruff~=0.0.290\n"  # For requirements.txt
 lint_pyproject_requirements = ["tool.ruff"]  # For pyproject.toml
 
 # Requirements and configurations for testing tools and coverage reporting
-test_requirements = "pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2"  # For requirements.txt
-test_pyproject_requirements = ["tool.pytest.ini_options", "tool.coverage.report"]  # For pyproject.toml
+test_requirements = (  # For requirements.txt
+    "pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2"
+)
+test_pyproject_requirements = [  # For pyproject.toml
+    "tool.pytest.ini_options",
+    "tool.coverage.report",
+]
 
 # Configuration key for documentation dependencies
 docs_pyproject_requirements = ["project.optional-dependencies"]  # For pyproject.toml
@@ -27,16 +34,16 @@ def _remove_from_file(file_path: Path, content_to_remove: str) -> None:
         file_path (Path): The path of the file from which to remove content.
         content_to_remove (str): The content to be removed from the file.
     """
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         lines = file.readlines()
 
     # Split the content to remove into lines and remove trailing whitespaces/newlines
-    content_to_remove_lines = [line.strip() for line in content_to_remove.split('\n')]
+    content_to_remove_lines = [line.strip() for line in content_to_remove.split("\n")]
 
     # Keep lines that are not in content_to_remove
     lines = [line for line in lines if line.strip() not in content_to_remove_lines]
 
-    with open(file_path, 'w') as file:
+    with open(file_path, "w") as file:
         file.writelines(lines)
 
 
@@ -47,7 +54,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
         data (dict): The dictionary from which to remove the section.
         nested_key (str): The dotted path key representing the nested section to remove.
     """
-    keys = nested_key.split('.')
+    keys = nested_key.split(".")
     current_data = data
     # Look for Parent section
     for key in keys[:-1]:  # Iterate over all but last element
@@ -60,7 +67,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
     current_data.pop(keys[-1], None)  # Remove last element otherwise return None
     for key in reversed(keys[:-1]):
         parent_section = data
-        for key_part in keys[:keys.index(key)]:
+        for key_part in keys[: keys.index(key)]:
             parent_section = parent_section[key_part]
         if not current_data:  # If the section is empty, remove it
             parent_section.pop(key, None)
@@ -77,14 +84,14 @@ def _remove_from_toml(file_path: Path, sections_to_remove: list) -> None:
         sections_to_remove (list): A list of section keys to remove from the TOML file.
     """
     # Load the TOML file
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         data = toml.load(file)
 
     # Remove the specified sections
     for section in sections_to_remove:
         _remove_nested_section(data, section)
 
-    with open(file_path, 'w') as file:
+    with open(file_path, "w") as file:
         toml.dump(data, file)
 
 
@@ -124,7 +131,7 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
     # Empty the contents of conf/base/catalog.yml
     catalog_yml_path = current_dir / "conf/base/catalog.yml"
     if catalog_yml_path.exists():
-        catalog_yml_path.write_text('')
+        catalog_yml_path.write_text("")
     # Remove parameter files from conf/base
     conf_base_path = current_dir / "conf/base/"
     parameter_file_patterns = ["parameters_*.yml", "parameters/*.yml"]
@@ -133,7 +140,9 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
             _remove_file(param_file)
 
     # Remove the pipelines subdirectories, if Viz - also "reporting" folder
-    pipelines_to_remove = ["data_science", "data_processing"] + (["reporting"] if is_viz else [])
+    pipelines_to_remove = ["data_science", "data_processing"] + (
+        ["reporting"] if is_viz else []
+    )
 
     pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
     for pipeline_subdir in pipelines_to_remove:
@@ -150,23 +159,28 @@ def _remove_extras_from_kedro_datasets(file_path: Path) -> None:
     Args:
         file_path (Path): The path of the requirements file.
     """
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         lines = file.readlines()
 
     for i, line in enumerate(lines):
-        if 'kedro-datasets[' in line:
+        if "kedro-datasets[" in line:
             # Split the line at '[', and keep the part before it
-            package = line.split('[', 1)[0]
+            package = line.split("[", 1)[0]
             # Extract version
-            version = line.split(']')[-1]
+            version = line.split("]")[-1]
             lines[i] = package + version
 
-    with open(file_path, 'w') as file:
+    with open(file_path, "w") as file:
         file.writelines(lines)
 
 
-def setup_template_tools(selected_tools_list: str, requirements_file_path: Path, pyproject_file_path: Path,
-                         python_package_name: str, example_pipeline: str) -> None:
+def setup_template_tools(
+    selected_tools_list: str,
+    requirements_file_path: Path,
+    pyproject_file_path: Path,
+    python_package_name: str,
+    example_pipeline: str,
+) -> None:
     """Set up the templates according to the choice of tools.
 
     Args:
@@ -195,25 +209,22 @@ def setup_template_tools(selected_tools_list: str, requirements_file_path: Path,
     if "Data Structure" not in selected_tools_list and example_pipeline != "True":
         _remove_dir(current_dir / "data")
 
-    if ("PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list) and example_pipeline != "True":
-        _remove_pyspark_viz_starter_files("Kedro Viz" in selected_tools_list, python_package_name)
+    if (
+        "PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list
+    ) and example_pipeline != "True":
+        _remove_pyspark_viz_starter_files(
+            "Kedro Viz" in selected_tools_list, python_package_name
+        )
         # Remove requirements used by example pipelines
         _remove_from_file(requirements_file_path, example_pipeline_requirements)
         _remove_extras_from_kedro_datasets(requirements_file_path)
 
 
 def sort_requirements(requirements_file_path: Path) -> None:
-    """Sort the requirements.txt file alphabetically and write it back to the file.
+    """Sort entries in `requirements.txt`, writing back changes, if any.
 
     Args:
         requirements_file_path (Path): The path to the `requirements.txt` file.
     """
-    with open(requirements_file_path, 'r') as requirements:
-        lines = requirements.readlines()
-
-    lines = [line.strip() for line in lines]
-    lines.sort()
-    sorted_content = '\n'.join(lines)
-
-    with open(requirements_file_path, 'w') as requirements:
-        requirements.write(sorted_content)
+    with open(requirements_file_path, "rb+") as file_obj:
+        fix_requirements(file_obj)
@@ -28,6 +28,7 @@ dependencies = [
     "omegaconf>=2.1.1",
     "parse>=1.19.0",
     "pluggy>=1.0",
+    "pre-commit-hooks",
     "PyYAML>=4.2,<7.0",
     "rich>=12.0,<14.0",
     "rope>=0.21,<2.0",  # subject to LGPLv3 license