From 3948e592a7e2e7bdd133f7863b3d6ac925165873 Mon Sep 17 00:00:00 2001 From: Haitham-AbdelKarim Date: Tue, 12 Mar 2024 16:07:42 +0200 Subject: [PATCH] Refactor markdown removal functions to accept markdown as input string --- project_explainer_ui/ui.py | 2 +- project_processor/gh_processor/file_utils.py | 66 +++----------------- 2 files changed, 10 insertions(+), 58 deletions(-) diff --git a/project_explainer_ui/ui.py b/project_explainer_ui/ui.py index a836666..03bfd95 100644 --- a/project_explainer_ui/ui.py +++ b/project_explainer_ui/ui.py @@ -5,7 +5,7 @@ def summarize(summarization_type, github_project_url, github_project_branch="mai gptExplainer = Explainer(huggingface_model_id) if summarization_type == "brief": return gptExplainer.brief(github_url=github_project_url, branch=github_project_branch)["summary"] - return gptExplainer.outline(github_url=github_project_url, branch=github_project_branch)["summary"] + return gptExplainer.outline(github_url=github_project_url, branch=github_project_branch) demo = gr.Interface( fn=summarize, diff --git a/project_processor/gh_processor/file_utils.py b/project_processor/gh_processor/file_utils.py index 77480c4..27c2710 100644 --- a/project_processor/gh_processor/file_utils.py +++ b/project_processor/gh_processor/file_utils.py @@ -398,120 +398,72 @@ def get_elements_from_markdown_file(file_path: str, elements: List[str]) -> Dict return result -def remove_images_from_markdown(file_path: str) -> str: +def remove_images_from_markdown(markdown_content: str) -> str: """ Removes image tags from a Markdown file and returns the updated content without images. Args: - file_path: The path to the Markdown file. + markdown_content: The Markdown content that will be processed. Returns: The Markdown content without images. - Raises: - ValueError: If the provided file is not a Markdown file or if the file does not exist. """ - - if not file_path.lower().endswith('.md'): - raise ValueError( - "Invalid file. Only Markdown files (.md) are supported.") - - if not os.path.isfile(file_path): - raise ValueError("File not found.") - - with open(file_path, 'r') as f: - markdown_content = f.read() - + markdown_content_without_images = re.sub( '!\[.*?\]\(.*?\)', '', markdown_content) return markdown_content_without_images -def remove_links_from_markdown(file_path: str) -> str: +def remove_links_from_markdown(markdown_content: str) -> str: """ Removes link tags from a Markdown file and returns the updated content. Args: - file_path: The path to the Markdown file. + markdown_content: The Markdown content that will be processed. Returns: The Markdown content without links. - Raises: - ValueError: If the provided file is not a Markdown file or if the file does not exist. """ - if not file_path.lower().endswith('.md'): - raise ValueError( - "Invalid file. Only Markdown files (.md) are supported.") - - if not os.path.isfile(file_path): - raise ValueError("File not found.") - - with open(file_path, 'r') as f: - markdown_content = f.read() - markdown_content_without_links = re.sub( '\[.*?\]\(.*?\)', '', markdown_content) return markdown_content_without_links -def remove_code_blocks_from_markdown(file_path: str) -> str: +def remove_code_blocks_from_markdown(markdown_content: str) -> str: """ Removes code blocks from a Markdown file and returns the updated content. Args: - file_path: The path to the Markdown file. + markdown_content: The Markdown content that will be processed. Returns: The Markdown content without code blocks. - Raises: - ValueError: If the provided file is not a Markdown file or if the file does not exist. """ - if not file_path.lower().endswith('.md'): - raise ValueError( - "Invalid file. Only Markdown files (.md) are supported.") - - if not os.path.isfile(file_path): - raise ValueError("File not found.") - - with open(file_path, 'r') as f: - markdown_content = f.read() - markdown_content_without_code_blocks = re.sub( '```[\s\S]*?```', '', markdown_content) return markdown_content_without_code_blocks -def remove_tables_from_markdown(file_path: str) -> str: +def remove_tables_from_markdown(markdown_content: str) -> str: """ Removes tables from a Markdown file and returns the updated content. Args: - file_path: The path to the Markdown file. + markdown_content: The Markdown content that will be processed. Returns: The Markdown content without tables. - Raises: - ValueError: If the provided file is not a Markdown file or if the file does not exist. """ - if not file_path.lower().endswith('.md'): - raise ValueError( - "Invalid file. Only Markdown files (.md) are supported.") - - if not os.path.isfile(file_path): - raise ValueError("File not found.") - - with open(file_path, 'r') as f: - markdown_content = f.read() - markdown_content_without_tables = re.sub( r'\n\|.*\|\n\|.*\|\n(\|.*\|)+', '', markdown_content)