-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
extract out the connector changelog modification out of the bump_version code #34586
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import datetime | ||
import re | ||
from dataclasses import dataclass | ||
from operator import attrgetter | ||
from typing import Set, Tuple | ||
|
||
import semver | ||
from pipelines.helpers.github import AIRBYTE_GITHUB_REPO | ||
|
||
|
||
class ChangelogParsingException(Exception): | ||
pass | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ChangelogEntry: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use a frozen dataclass there? It'll make the class leaner and make it explicitely a stateless object There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea what a "frozen dataclass" is... I guess I have some reading to do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can consider Dataclass to be models / POJO. They're could frozen when they're immutable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
date: datetime.date | ||
version: semver.Version | ||
pr_number: int | ||
comment: str | ||
|
||
def to_markdown(self, github_repo: str = AIRBYTE_GITHUB_REPO) -> str: | ||
return f'| {self.version} | {self.date.strftime("%Y-%m-%d")} | [{self.pr_number}](https://github.com/{github_repo}/pull/{self.pr_number}) | {self.comment} |' | ||
|
||
def __str__(self) -> str: | ||
return f'version={self.version}, data={self.date.strftime("%Y-%m-%d")}, pr_number={self.pr_number}, comment={self.comment}' | ||
|
||
def __repr__(self) -> str: | ||
return "ChangelogEntry: " + self.__str__() | ||
|
||
def __eq__(self, other: object) -> bool: | ||
if not isinstance(other, ChangelogEntry): | ||
return False | ||
retVal = ( | ||
self.date == other.date | ||
and self.version == other.version | ||
and self.pr_number == other.pr_number | ||
and self.comment == other.comment | ||
) | ||
return retVal | ||
|
||
def __ne__(self, other: object) -> bool: | ||
return not (self.__eq__(other)) | ||
|
||
def __hash__(self) -> int: | ||
return self.__str__().__hash__() | ||
|
||
|
||
def parse_markdown(markdown_lines: list[str], github_repo: str) -> Tuple[int, Set[ChangelogEntry]]: | ||
"""This parses the markdown to find the changelog table, and then populates entries with the existing entries""" | ||
changelog_entry_re = ( | ||
"^\\| *(?P<version>[0-9]+\\.[0-9+]+\\.[0-9]+?) *\\| *" | ||
+ "(?P<day>[0-9]{4}-[0-9]{2}-[0-9]{2}) *\\| *" | ||
+ "\\[?(?P<pr_number1>[0-9]+)\\]? ?\\(https://github.com/" | ||
+ github_repo | ||
+ "/pull/(?P<pr_number2>[0-9]+)\\) *\\| *" | ||
+ "(?P<comment>[^ ].*[^ ]) *\\| *$" | ||
) | ||
changelog_header_line_index = -1 | ||
changelog_line_enumerator = enumerate(markdown_lines) | ||
for line_index, line in changelog_line_enumerator: | ||
if re.search(r"\| *Version *\| *Date *\| *Pull Request *\| *Subject *\|", line): | ||
changelog_header_line_index = line_index | ||
break | ||
if changelog_header_line_index == -1: | ||
raise ChangelogParsingException("Could not find the changelog section table in the documentation file.") | ||
if markdown_lines[changelog_header_line_index - 1] != "": | ||
raise ChangelogParsingException( | ||
"Found changelog section table in the documentation file at line but there is not blank line before it." | ||
) | ||
if not re.search(r"(\|[- :]*){4}\|", next(changelog_line_enumerator)[1]): | ||
raise ChangelogParsingException("The changelog table in the documentation file is missing the header delimiter.") | ||
changelog_entries_start_line_index = changelog_header_line_index + 2 | ||
|
||
# parse next line to see if it needs to be cut | ||
entries = set() | ||
for line_index, line in changelog_line_enumerator: | ||
changelog_entry_regexp = re.search(changelog_entry_re, line) | ||
if not changelog_entry_regexp or changelog_entry_regexp.group("pr_number1") != changelog_entry_regexp.group("pr_number2"): | ||
break | ||
entry_version = semver.VersionInfo.parse(changelog_entry_regexp.group("version")) | ||
entry_date = datetime.datetime.strptime(changelog_entry_regexp.group("day"), "%Y-%m-%d").date() | ||
entry_pr_number = int(changelog_entry_regexp.group("pr_number1")) | ||
entry_comment = changelog_entry_regexp.group("comment") | ||
changelog_entry = ChangelogEntry(entry_date, entry_version, entry_pr_number, entry_comment) | ||
entries.add(changelog_entry) | ||
|
||
return changelog_entries_start_line_index, entries | ||
|
||
|
||
class Changelog: | ||
def __init__(self, markdown: str, github_repo: str = AIRBYTE_GITHUB_REPO) -> None: | ||
self.original_markdown_lines = markdown.splitlines() | ||
self.changelog_entries_start_line_index, self.original_entries = parse_markdown(self.original_markdown_lines, github_repo) | ||
self.new_entries: Set[ChangelogEntry] = set() | ||
self.github_repo = github_repo | ||
|
||
def add_entry(self, version: semver.Version, date: datetime.date, pull_request_number: int, comment: str) -> None: | ||
self.new_entries.add(ChangelogEntry(date, version, pull_request_number, comment)) | ||
|
||
def to_markdown(self) -> str: | ||
all_entries = set(self.original_entries.union(self.new_entries)) | ||
sorted_entries = sorted( | ||
sorted( | ||
sorted(sorted(all_entries, key=attrgetter("comment"), reverse=True), key=attrgetter("pr_number"), reverse=True), | ||
key=attrgetter("date"), | ||
reverse=True, | ||
), | ||
key=attrgetter("version"), | ||
reverse=True, | ||
) | ||
new_lines = ( | ||
self.original_markdown_lines[: self.changelog_entries_start_line_index] | ||
+ [line.to_markdown(self.github_repo) for line in sorted_entries] | ||
+ self.original_markdown_lines[(self.changelog_entries_start_line_index + len(self.original_entries)) :] | ||
) | ||
return "\n".join(new_lines) + "\n" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import datetime | ||
import difflib | ||
from pathlib import Path | ||
|
||
import pytest | ||
import semver | ||
from pipelines.helpers.changelog import Changelog, ChangelogParsingException | ||
|
||
pytestmark = [ | ||
pytest.mark.anyio, | ||
] | ||
|
||
PATH_TO_INITIAL_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/initial_files") | ||
PATH_TO_RESULT_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/result_files") | ||
WRITE_TO_RESULT_FILE = False | ||
|
||
|
||
def check_result(changelog: Changelog, result_filename: str): | ||
markdown = changelog.to_markdown() | ||
result_filepath = PATH_TO_RESULT_FILES / result_filename | ||
if not result_filepath.exists(): | ||
expected_text = "" | ||
else: | ||
expected_text = result_filepath.read_text() | ||
diff = "".join(difflib.unified_diff(expected_text.splitlines(1), markdown.splitlines(1))) | ||
if WRITE_TO_RESULT_FILE: | ||
result_file = open(result_filepath, "w") | ||
result_file.write(markdown) | ||
result_file.close() | ||
assert diff == "" | ||
|
||
|
||
def get_changelog(filename: str) -> Changelog: | ||
filepath = PATH_TO_INITIAL_FILES / filename | ||
return Changelog(open(filepath).read()) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"]) | ||
def test_single_insert(dagger_client, filename): | ||
changelog = get_changelog(filename) | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test") | ||
check_result(changelog, "single_insert_" + filename) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"]) | ||
def test_insert_duplicate_versions(dagger_client, filename): | ||
changelog = get_changelog(filename) | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1") | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-02"), 123457, "test2") | ||
check_result(changelog, "dupicate_versions_" + filename) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"]) | ||
def test_insert_duplicate_version_date(dagger_client, filename): | ||
changelog = get_changelog(filename) | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1") | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123457, "test2") | ||
check_result(changelog, "dupicate_version_date_" + filename) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"]) | ||
def test_insert_duplicate_entries(dagger_client, filename): | ||
changelog = get_changelog(filename) | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test") | ||
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test") | ||
check_result(changelog, "duplicate_entry_" + filename) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"]) | ||
def test_insert_existing_entries(dagger_client, filename): | ||
changelog = get_changelog(filename) | ||
changelog.add_entry(semver.VersionInfo.parse("3.3.3"), datetime.date.fromisoformat("2024-01-26"), 34573, "Adopt CDK v0.16.0") | ||
changelog.add_entry( | ||
semver.VersionInfo.parse("3.3.2"), | ||
datetime.date.fromisoformat("2024-01-24"), | ||
34465, | ||
"Check xmin only if user selects xmin sync mode.", | ||
) | ||
check_result(changelog, "existing_entries_" + filename) | ||
|
||
|
||
@pytest.mark.parametrize("filename", ["no_changelog_header.md", "changelog_header_no_separator.md", "changelog_header_no_newline.md"]) | ||
def test_failure(dagger_client, filename): | ||
try: | ||
get_changelog(filename) | ||
assert False | ||
except ChangelogParsingException as e: | ||
result_filepath = PATH_TO_RESULT_FILES / filename | ||
if not result_filepath.exists(): | ||
expected_text = "" | ||
else: | ||
expected_text = result_filepath.read_text() | ||
diff = "\n".join(difflib.unified_diff(expected_text.splitlines(), str(e).splitlines())) | ||
if WRITE_TO_RESULT_FILE: | ||
result_file = open(result_filepath, "w") | ||
result_file.write(str(e)) | ||
result_file.close() | ||
assert diff == "" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Postgres | ||
|
||
Airbyte's certified Postgres connector offers the following features: | ||
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions. | ||
| Version | Date | Pull Request | Subject | | ||
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ||
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 | | ||
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. | | ||
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. | | ||
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Postgres | ||
|
||
Airbyte's certified Postgres connector offers the following features: | ||
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions. | ||
|
||
| Version | Date | Pull Request | Subject | | ||
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 | | ||
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. | | ||
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. | | ||
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | | |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Postgres | ||
|
||
Airbyte's certified Postgres connector offers the following features: | ||
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions. | ||
|
||
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ||
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 | | ||
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. | | ||
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. | | ||
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is exactly the approach I suggested before, thank you so much!