Skip to content

Commit

Permalink
Sort requirements.txt based on package name only (#3436)
Browse files Browse the repository at this point in the history
* Sort `requirements.txt` based on package name only

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Remove now-unused custom `requirements.txt` sorter

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Ruff format kedro/templates/project/hooks/utils.py

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Pass the right argument to `fix_requirements` call

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Add `sort_requirements` until starters are updated

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

* Wrap lib call in existing method for compatibility

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

---------

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
  • Loading branch information
deepyaman authored Dec 18, 2023
1 parent 51b47bf commit 52d5189
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 37 deletions.
17 changes: 10 additions & 7 deletions kedro/templates/project/hooks/post_gen_project.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
from pathlib import Path

from kedro.templates.project.hooks.utils import (

setup_template_tools,
sort_requirements,
)
from kedro.templates.project.hooks.utils import setup_template_tools, sort_requirements


def main():
current_dir = Path.cwd()
requirements_file_path = current_dir / "requirements.txt"
pyproject_file_path = current_dir / "pyproject.toml"
python_package_name = '{{ cookiecutter.python_package }}'
python_package_name = "{{ cookiecutter.python_package }}"

# Get the selected tools from cookiecutter
selected_tools = "{{ cookiecutter.tools }}"
example_pipeline = "{{ cookiecutter.example_pipeline }}"

# Handle template directories and requirements according to selected tools
setup_template_tools(selected_tools, requirements_file_path, pyproject_file_path, python_package_name, example_pipeline)
setup_template_tools(
selected_tools,
requirements_file_path,
pyproject_file_path,
python_package_name,
example_pipeline,
)

# Sort requirements.txt file in alphabetical order
sort_requirements(requirements_file_path)


if __name__ == "__main__":
main()
71 changes: 41 additions & 30 deletions kedro/templates/project/hooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,22 @@
import shutil
import toml

from pre_commit_hooks.requirements_txt_fixer import fix_requirements

current_dir = Path.cwd()

# Requirements for linting tools
lint_requirements = "black~=22.0\nruff~=0.0.290\n" # For requirements.txt
lint_pyproject_requirements = ["tool.ruff"] # For pyproject.toml

# Requirements and configurations for testing tools and coverage reporting
test_requirements = "pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2" # For requirements.txt
test_pyproject_requirements = ["tool.pytest.ini_options", "tool.coverage.report"] # For pyproject.toml
test_requirements = ( # For requirements.txt
"pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2"
)
test_pyproject_requirements = [ # For pyproject.toml
"tool.pytest.ini_options",
"tool.coverage.report",
]

# Configuration key for documentation dependencies
docs_pyproject_requirements = ["project.optional-dependencies"] # For pyproject.toml
Expand All @@ -27,16 +34,16 @@ def _remove_from_file(file_path: Path, content_to_remove: str) -> None:
file_path (Path): The path of the file from which to remove content.
content_to_remove (str): The content to be removed from the file.
"""
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
lines = file.readlines()

# Split the content to remove into lines and remove trailing whitespaces/newlines
content_to_remove_lines = [line.strip() for line in content_to_remove.split('\n')]
content_to_remove_lines = [line.strip() for line in content_to_remove.split("\n")]

# Keep lines that are not in content_to_remove
lines = [line for line in lines if line.strip() not in content_to_remove_lines]

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
file.writelines(lines)


Expand All @@ -47,7 +54,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
data (dict): The dictionary from which to remove the section.
nested_key (str): The dotted path key representing the nested section to remove.
"""
keys = nested_key.split('.')
keys = nested_key.split(".")
current_data = data
# Look for Parent section
for key in keys[:-1]: # Iterate over all but last element
Expand All @@ -60,7 +67,7 @@ def _remove_nested_section(data: dict, nested_key: str) -> None:
current_data.pop(keys[-1], None) # Remove last element otherwise return None
for key in reversed(keys[:-1]):
parent_section = data
for key_part in keys[:keys.index(key)]:
for key_part in keys[: keys.index(key)]:
parent_section = parent_section[key_part]
if not current_data: # If the section is empty, remove it
parent_section.pop(key, None)
Expand All @@ -77,14 +84,14 @@ def _remove_from_toml(file_path: Path, sections_to_remove: list) -> None:
sections_to_remove (list): A list of section keys to remove from the TOML file.
"""
# Load the TOML file
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
data = toml.load(file)

# Remove the specified sections
for section in sections_to_remove:
_remove_nested_section(data, section)

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
toml.dump(data, file)


Expand Down Expand Up @@ -124,7 +131,7 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
# Empty the contents of conf/base/catalog.yml
catalog_yml_path = current_dir / "conf/base/catalog.yml"
if catalog_yml_path.exists():
catalog_yml_path.write_text('')
catalog_yml_path.write_text("")
# Remove parameter files from conf/base
conf_base_path = current_dir / "conf/base/"
parameter_file_patterns = ["parameters_*.yml", "parameters/*.yml"]
Expand All @@ -133,7 +140,9 @@ def _remove_pyspark_viz_starter_files(is_viz: bool, python_package_name: str) ->
_remove_file(param_file)

# Remove the pipelines subdirectories, if Viz - also "reporting" folder
pipelines_to_remove = ["data_science", "data_processing"] + (["reporting"] if is_viz else [])
pipelines_to_remove = ["data_science", "data_processing"] + (
["reporting"] if is_viz else []
)

pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
for pipeline_subdir in pipelines_to_remove:
Expand All @@ -150,23 +159,28 @@ def _remove_extras_from_kedro_datasets(file_path: Path) -> None:
Args:
file_path (Path): The path of the requirements file.
"""
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
lines = file.readlines()

for i, line in enumerate(lines):
if 'kedro-datasets[' in line:
if "kedro-datasets[" in line:
# Split the line at '[', and keep the part before it
package = line.split('[', 1)[0]
package = line.split("[", 1)[0]
# Extract version
version = line.split(']')[-1]
version = line.split("]")[-1]
lines[i] = package + version

with open(file_path, 'w') as file:
with open(file_path, "w") as file:
file.writelines(lines)


def setup_template_tools(selected_tools_list: str, requirements_file_path: Path, pyproject_file_path: Path,
python_package_name: str, example_pipeline: str) -> None:
def setup_template_tools(
selected_tools_list: str,
requirements_file_path: Path,
pyproject_file_path: Path,
python_package_name: str,
example_pipeline: str,
) -> None:
"""Set up the templates according to the choice of tools.
Args:
Expand Down Expand Up @@ -195,25 +209,22 @@ def setup_template_tools(selected_tools_list: str, requirements_file_path: Path,
if "Data Structure" not in selected_tools_list and example_pipeline != "True":
_remove_dir(current_dir / "data")

if ("PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list) and example_pipeline != "True":
_remove_pyspark_viz_starter_files("Kedro Viz" in selected_tools_list, python_package_name)
if (
"PySpark" in selected_tools_list or "Kedro Viz" in selected_tools_list
) and example_pipeline != "True":
_remove_pyspark_viz_starter_files(
"Kedro Viz" in selected_tools_list, python_package_name
)
# Remove requirements used by example pipelines
_remove_from_file(requirements_file_path, example_pipeline_requirements)
_remove_extras_from_kedro_datasets(requirements_file_path)


def sort_requirements(requirements_file_path: Path) -> None:
"""Sort the requirements.txt file alphabetically and write it back to the file.
"""Sort entries in `requirements.txt`, writing back changes, if any.
Args:
requirements_file_path (Path): The path to the `requirements.txt` file.
"""
with open(requirements_file_path, 'r') as requirements:
lines = requirements.readlines()

lines = [line.strip() for line in lines]
lines.sort()
sorted_content = '\n'.join(lines)

with open(requirements_file_path, 'w') as requirements:
requirements.write(sorted_content)
with open(requirements_file_path, "rb+") as file_obj:
fix_requirements(file_obj)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dependencies = [
"omegaconf>=2.1.1",
"parse>=1.19.0",
"pluggy>=1.0",
"pre-commit-hooks",

This comment has been minimized.

Copy link
@sbrugman

sbrugman Dec 18, 2023

Contributor

Shouldn't this be a dev dependency?

This comment has been minimized.

Copy link
@astrojuanlu

astrojuanlu Dec 18, 2023

Member

See discussion in #3436 (comment)

"PyYAML>=4.2,<7.0",
"rich>=12.0,<14.0",
"rope>=0.21,<2.0", # subject to LGPLv3 license
Expand Down

0 comments on commit 52d5189

Please sign in to comment.