Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sort requirements.txt based on package name only #3436

Merged
merged 6 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions kedro/templates/project/hooks/post_gen_project.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
from pathlib import Path

from kedro.templates.project.hooks.utils import (

setup_template_tools,
sort_requirements,
)
from kedro.templates.project.hooks.utils import setup_template_tools, sort_requirements


def main():
current_dir = Path.cwd()
requirements_file_path = current_dir / "requirements.txt"
pyproject_file_path = current_dir / "pyproject.toml"
python_package_name = '{{ cookiecutter.python_package }}'
python_package_name = "{{ cookiecutter.python_package }}"

# Get the selected tools from cookiecutter
selected_tools = "{{ cookiecutter.tools }}"
example_pipeline = "{{ cookiecutter.example_pipeline }}"

# Handle template directories and requirements according to selected tools
setup_template_tools(selected_tools, requirements_file_path, pyproject_file_path, python_package_name, example_pipeline)
setup_template_tools(
selected_tools,
requirements_file_path,
pyproject_file_path,
python_package_name,
example_pipeline,
)

# Sort requirements.txt file in alphabetical order
sort_requirements(requirements_file_path)


if __name__ == "__main__":
main()
71 changes: 41 additions & 30 deletions kedro/templates/project/hooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,22 @@
import shutil
import toml

from pre_commit_hooks.requirements_txt_fixer import fix_requirements

current_dir = Path.cwd()

# Requirements for linting tools
lint_requirements = "black~=22.0\nruff~=0.0.290\n" # For requirements.txt
lint_pyproject_requirements = ["tool.ruff"] # For pyproject.toml

# Requirements and configurations for testing tools and coverage reporting
test_requirements = "pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2" # For requirements.txt
test_pyproject_requirements = ["tool.pytest.ini_options", "tool.coverage.report"] # For pyproject.toml
test_requirements = ( # For requirements.txt
"pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2"
)
test_pyproject_requirements = [ # For pyproject.toml
"tool.pytest.ini_options",
"tool.coverage.report",
]

# Configuration key for documentation dependencies
docs_pyproject_requirements = ["project.optional-dependencies"] # For pyproject.toml
Expand All @@ -27,16 +34,16 @@ def _remove_from_file(file_path: Path, content_to_remove: str) -> None:
file_path (Path): The path of the file from which to remove content.
content_to_remove (str): The content to be removed from the file.
"""
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
lines = file.readlines()

# Split the content to remove into lines and remove trailing whitespaces/newlines
content_to_remove_lines = [line.strip() for line in content_to_remove.split('\n')]
content_to_remove_lines = [line.strip() for line in content_to_remove.split("\n")]

# Keep lines that are not in content_to_remove
lines = [line for line in lines if line.strip() not in content_to_remove_lines]

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
file.writelines(lines)


Expand All @@ -47,7 +54,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
data (dict): The dictionary from which to remove the section.
nested_key (str): The dotted path key representing the nested section to remove.
"""
keys = nested_key.split('.')
keys = nested_key.split(".")
current_data = data
# Look for Parent section
for key in keys[:-1]: # Iterate over all but last element
Expand All @@ -60,7 +67,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
current_data.pop(keys[-1], None) # Remove last element otherwise return None
for key in reversed(keys[:-1]):
parent_section = data
for key_part in keys[:keys.index(key)]:
for key_part in keys[: keys.index(key)]:
parent_section = parent_section[key_part]
if not current_data: # If the section is empty, remove it
parent_section.pop(key, None)
Expand All @@ -77,14 +84,14 @@ def _remove_from_toml(file_path: Path, sections_to_remove: list) -> None:
sections_to_remove (list): A list of section keys to remove from the TOML file.
"""
# Load the TOML file
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
data = toml.load(file)

# Remove the specified sections
for section in sections_to_remove:
_remove_nested_section(data, section)

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
toml.dump(data, file)


Expand Down Expand Up @@ -124,7 +131,7 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
# Empty the contents of conf/base/catalog.yml
catalog_yml_path = current_dir / "conf/base/catalog.yml"
if catalog_yml_path.exists():
catalog_yml_path.write_text('')
catalog_yml_path.write_text("")
# Remove parameter files from conf/base
conf_base_path = current_dir / "conf/base/"
parameter_file_patterns = ["parameters_*.yml", "parameters/*.yml"]
Expand All @@ -133,7 +140,9 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
_remove_file(param_file)

# Remove the pipelines subdirectories, if Viz - also "reporting" folder
pipelines_to_remove = ["data_science", "data_processing"] + (["reporting"] if is_viz else [])
pipelines_to_remove = ["data_science", "data_processing"] + (
["reporting"] if is_viz else []
)

pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
for pipeline_subdir in pipelines_to_remove:
Expand All @@ -150,23 +159,28 @@ def _remove_extras_from_kedro_datasets(file_path: Path) -> None:
Args:
file_path (Path): The path of the requirements file.
"""
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
lines = file.readlines()

for i, line in enumerate(lines):
if 'kedro-datasets[' in line:
if "kedro-datasets[" in line:
# Split the line at '[', and keep the part before it
package = line.split('[', 1)[0]
package = line.split("[", 1)[0]
# Extract version
version = line.split(']')[-1]
version = line.split("]")[-1]
lines[i] = package + version

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
file.writelines(lines)


def setup_template_tools(selected_tools_list: str, requirements_file_path: Path, pyproject_file_path: Path,
python_package_name: str, example_pipeline: str) -> None:
def setup_template_tools(
selected_tools_list: str,
requirements_file_path: Path,
pyproject_file_path: Path,
python_package_name: str,
example_pipeline: str,
) -> None:
"""Set up the templates according to the choice of tools.

Args:
Expand Down Expand Up @@ -195,25 +209,22 @@ def setup_template_tools(selected_tools_list: str, requirements_file_path: Path,
if "Data Structure" not in selected_tools_list and example_pipeline != "True":
_remove_dir(current_dir / "data")

if ("PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list) and example_pipeline != "True":
_remove_pyspark_viz_starter_files("Kedro Viz" in selected_tools_list, python_package_name)
if (
"PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list
) and example_pipeline != "True":
_remove_pyspark_viz_starter_files(
"Kedro Viz" in selected_tools_list, python_package_name
)
# Remove requirements used by example pipelines
_remove_from_file(requirements_file_path, example_pipeline_requirements)
_remove_extras_from_kedro_datasets(requirements_file_path)


def sort_requirements(requirements_file_path: Path) -> None:
"""Sort the requirements.txt file alphabetically and write it back to the file.
"""Sort entries in `requirements.txt`, writing back changes, if any.

Args:
requirements_file_path (Path): The path to the `requirements.txt` file.
"""
with open(requirements_file_path, 'r') as requirements:
lines = requirements.readlines()

lines = [line.strip() for line in lines]
lines.sort()
sorted_content = '\n'.join(lines)

with open(requirements_file_path, 'w') as requirements:
requirements.write(sorted_content)
with open(requirements_file_path, "rb+") as file_obj:
fix_requirements(file_obj)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"omegaconf>=2.1.1",
"parse>=1.19.0",
"pluggy>=1.0",
"pre-commit-hooks",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not extremely happy about adding a runtime dependency on pre-commit-hooks though. At least it's unconstrained, but (1) it's unclear to me if the project is meant to be used from its Python API (not that we're using a private function or anything, but I don't see references to it on the READMEs) and (2) this dependency only exists for kedro new and users are already complaining that we have too many of them, this seems to go in the opposite direction.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@astrojuanlu If you feel strongly against it, I can just copy the implementation of that particular function. I added this because pre-commit-hooks is an extremely lightweight library (only dependency is ruamel.yaml, which itself is very light).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@merelcht what do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @astrojuanlu, this sort of dependency isn't core to Kedro as a Framework and so it doesn't belong as a run-time dependency. If anything it should be something like a "dev" dependency.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@merelcht I don't think that's @astrojuanlu's point. I am using pre-commit-hooks as a runtime dependency here, because the starter requirements get sorted.

The question is whether to vendor the requirements sorting logic or leave this lightweight dependency.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one of the blockers for Pyodide support #4278

"PyYAML>=4.2,<7.0",
"rich>=12.0,<14.0",
"rope>=0.21,<2.0", # subject to LGPLv3 license
Expand Down