Skip to content

Commit

Permalink
extract out the connector changelog modification out of the bump_vers…
Browse files Browse the repository at this point in the history
…ion code (#34586)

We want to move the changelog mutation out of the connectors command, so we can reuse it for java CDK.
all connectors were tested, and the only ones that were failing are fixed in #36039
  • Loading branch information
stephane-airbyte authored Mar 14, 2024
1 parent 1d11ceb commit 3fae982
Show file tree
Hide file tree
Showing 23 changed files with 418 additions and 25 deletions.
1 change: 1 addition & 0 deletions airbyte-ci/connectors/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ E.G.: running Poe tasks on the modified internal packages of the current branch:

| Version | PR | Description |
| ------- | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
| 4.5.3 | [#34586](https://github.com/airbytehq/airbyte/pull/34586) | Extract connector changelog modification logic into its own class |
| 4.5.2 | [#35802](https://github.com/airbytehq/airbyte/pull/35802) | Fix bug with connectors bump_version command |
| 4.5.1 | [#35786](https://github.com/airbytehq/airbyte/pull/35786) | Declare `live_tests` as an internal poetry package. |
| 4.5.0 | [#35784](https://github.com/airbytehq/airbyte/pull/35784) | Format command supports kotlin |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pipelines.airbyte_ci.connectors.reports import ConnectorReport, Report
from pipelines.airbyte_ci.metadata.pipeline import MetadataValidation
from pipelines.helpers import git
from pipelines.helpers.changelog import Changelog
from pipelines.helpers.connectors import metadata_change_helpers
from pipelines.models.steps import Step, StepResult, StepStatus

Expand Down Expand Up @@ -39,14 +40,14 @@ def __init__(
context: ConnectorContext,
repo_dir: Container,
new_version: str,
changelog_entry: str,
comment: str,
pull_request_number: str,
) -> None:
super().__init__(context)
self.repo_dir = repo_dir
self.new_version = new_version
self.changelog_entry = changelog_entry
self.pull_request_number = pull_request_number
self.new_version = semver.VersionInfo.parse(new_version)
self.comment = comment
self.pull_request_number = int(pull_request_number)

async def _run(self) -> StepResult:
doc_path = self.context.connector.documentation_file_path
Expand All @@ -58,13 +59,13 @@ async def _run(self) -> StepResult:
output=self.repo_dir,
)
try:
updated_doc = self.add_changelog_entry(doc_path.read_text())
original_markdown = doc_path.read_text()
changelog = Changelog(original_markdown)
changelog.add_entry(self.new_version, datetime.date.today(), self.pull_request_number, self.comment)
updated_doc = changelog.to_markdown()
except Exception as e:
return StepResult(
step=self,
status=StepStatus.FAILURE,
stdout=f"Could not add changelog entry: {e}",
output=self.repo_dir,
step=self, status=StepStatus.FAILURE, stderr=f"Could not add changelog entry: {e}", output=self.repo_dir, exc_info=e
)
updated_repo_dir = self.repo_dir.with_new_file(str(doc_path), contents=updated_doc)
return StepResult(
Expand All @@ -74,21 +75,6 @@ async def _run(self) -> StepResult:
output=updated_repo_dir,
)

def find_line_index_for_new_entry(self, markdown_text: str) -> int:
lines = markdown_text.splitlines()
for line_index, line in enumerate(lines):
if "version" in line.lower() and "date" in line.lower() and "pull request" in line.lower() and "subject" in line.lower():
return line_index + 2
raise Exception("Could not find the changelog section table in the documentation file.")

def add_changelog_entry(self, og_doc_content: str) -> str:
today = datetime.date.today().strftime("%Y-%m-%d")
lines = og_doc_content.splitlines()
line_index_for_new_entry = self.find_line_index_for_new_entry(og_doc_content)
new_entry = f"| {self.new_version} | {today} | [{self.pull_request_number}](https://github.com/airbytehq/airbyte/pull/{self.pull_request_number}) | {self.changelog_entry} |"
lines.insert(line_index_for_new_entry, new_entry)
return "\n".join(lines) + "\n"


class BumpDockerImageTagInMetadata(Step):
context: ConnectorContext
Expand Down
121 changes: 121 additions & 0 deletions airbyte-ci/connectors/pipelines/pipelines/helpers/changelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import datetime
import re
from dataclasses import dataclass
from operator import attrgetter
from typing import Set, Tuple

import semver
from pipelines.helpers.github import AIRBYTE_GITHUB_REPO


class ChangelogParsingException(Exception):
pass


@dataclass(frozen=True)
class ChangelogEntry:
date: datetime.date
version: semver.Version
pr_number: int
comment: str

def to_markdown(self, github_repo: str = AIRBYTE_GITHUB_REPO) -> str:
return f'| {self.version} | {self.date.strftime("%Y-%m-%d")} | [{self.pr_number}](https://github.com/{github_repo}/pull/{self.pr_number}) | {self.comment} |'

def __str__(self) -> str:
return f'version={self.version}, data={self.date.strftime("%Y-%m-%d")}, pr_number={self.pr_number}, comment={self.comment}'

def __repr__(self) -> str:
return "ChangelogEntry: " + self.__str__()

def __eq__(self, other: object) -> bool:
if not isinstance(other, ChangelogEntry):
return False
retVal = (
self.date == other.date
and self.version == other.version
and self.pr_number == other.pr_number
and self.comment == other.comment
)
return retVal

def __ne__(self, other: object) -> bool:
return not (self.__eq__(other))

def __hash__(self) -> int:
return self.__str__().__hash__()


def parse_markdown(markdown_lines: list[str], github_repo: str) -> Tuple[int, Set[ChangelogEntry]]:
"""This parses the markdown to find the changelog table, and then populates entries with the existing entries"""
changelog_entry_re = (
"^\\| *(?P<version>[0-9]+\\.[0-9+]+\\.[0-9]+?) *\\| *"
+ "(?P<day>[0-9]{4}-[0-9]{2}-[0-9]{2}) *\\| *"
+ "\\[?(?P<pr_number1>[0-9]+)\\]? ?\\(https://github.com/"
+ github_repo
+ "/pull/(?P<pr_number2>[0-9]+)\\) *\\| *"
+ "(?P<comment>[^ ].*[^ ]) *\\| *$"
)
changelog_header_line_index = -1
changelog_line_enumerator = enumerate(markdown_lines)
for line_index, line in changelog_line_enumerator:
if re.search(r"\| *Version *\| *Date *\| *Pull Request *\| *Subject *\|", line):
changelog_header_line_index = line_index
break
if changelog_header_line_index == -1:
raise ChangelogParsingException("Could not find the changelog section table in the documentation file.")
if markdown_lines[changelog_header_line_index - 1] != "":
raise ChangelogParsingException(
"Found changelog section table in the documentation file at line but there is not blank line before it."
)
if not re.search(r"(\|[- :]*){4}\|", next(changelog_line_enumerator)[1]):
raise ChangelogParsingException("The changelog table in the documentation file is missing the header delimiter.")
changelog_entries_start_line_index = changelog_header_line_index + 2

# parse next line to see if it needs to be cut
entries = set()
for line_index, line in changelog_line_enumerator:
changelog_entry_regexp = re.search(changelog_entry_re, line)
if not changelog_entry_regexp or changelog_entry_regexp.group("pr_number1") != changelog_entry_regexp.group("pr_number2"):
break
entry_version = semver.VersionInfo.parse(changelog_entry_regexp.group("version"))
entry_date = datetime.datetime.strptime(changelog_entry_regexp.group("day"), "%Y-%m-%d").date()
entry_pr_number = int(changelog_entry_regexp.group("pr_number1"))
entry_comment = changelog_entry_regexp.group("comment")
changelog_entry = ChangelogEntry(entry_date, entry_version, entry_pr_number, entry_comment)
entries.add(changelog_entry)

return changelog_entries_start_line_index, entries


class Changelog:
def __init__(self, markdown: str, github_repo: str = AIRBYTE_GITHUB_REPO) -> None:
self.original_markdown_lines = markdown.splitlines()
self.changelog_entries_start_line_index, self.original_entries = parse_markdown(self.original_markdown_lines, github_repo)
self.new_entries: Set[ChangelogEntry] = set()
self.github_repo = github_repo

def add_entry(self, version: semver.Version, date: datetime.date, pull_request_number: int, comment: str) -> None:
self.new_entries.add(ChangelogEntry(date, version, pull_request_number, comment))

def to_markdown(self) -> str:
all_entries = set(self.original_entries.union(self.new_entries))
sorted_entries = sorted(
sorted(
sorted(sorted(all_entries, key=attrgetter("comment"), reverse=True), key=attrgetter("pr_number"), reverse=True),
key=attrgetter("date"),
reverse=True,
),
key=attrgetter("version"),
reverse=True,
)
new_lines = (
self.original_markdown_lines[: self.changelog_entries_start_line_index]
+ [line.to_markdown(self.github_repo) for line in sorted_entries]
+ self.original_markdown_lines[(self.changelog_entries_start_line_index + len(self.original_entries)) :]
)
return "\n".join(new_lines) + "\n"
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/pipelines/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "pipelines"
version = "4.5.2"
version = "4.5.3"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <contact@airbyte.io>"]

Expand Down
102 changes: 102 additions & 0 deletions airbyte-ci/connectors/pipelines/tests/test_changelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import datetime
import difflib
from pathlib import Path

import pytest
import semver
from pipelines.helpers.changelog import Changelog, ChangelogParsingException

pytestmark = [
pytest.mark.anyio,
]

PATH_TO_INITIAL_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/initial_files")
PATH_TO_RESULT_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/result_files")
WRITE_TO_RESULT_FILE = False


def check_result(changelog: Changelog, result_filename: str):
markdown = changelog.to_markdown()
result_filepath = PATH_TO_RESULT_FILES / result_filename
if not result_filepath.exists():
expected_text = ""
else:
expected_text = result_filepath.read_text()
diff = "".join(difflib.unified_diff(expected_text.splitlines(1), markdown.splitlines(1)))
if WRITE_TO_RESULT_FILE:
result_file = open(result_filepath, "w")
result_file.write(markdown)
result_file.close()
assert diff == ""


def get_changelog(filename: str) -> Changelog:
filepath = PATH_TO_INITIAL_FILES / filename
return Changelog(open(filepath).read())


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_single_insert(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
check_result(changelog, "single_insert_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_versions(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-02"), 123457, "test2")
check_result(changelog, "dupicate_versions_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_version_date(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123457, "test2")
check_result(changelog, "dupicate_version_date_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_entries(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
check_result(changelog, "duplicate_entry_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_existing_entries(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.3.3"), datetime.date.fromisoformat("2024-01-26"), 34573, "Adopt CDK v0.16.0")
changelog.add_entry(
semver.VersionInfo.parse("3.3.2"),
datetime.date.fromisoformat("2024-01-24"),
34465,
"Check xmin only if user selects xmin sync mode.",
)
check_result(changelog, "existing_entries_" + filename)


@pytest.mark.parametrize("filename", ["no_changelog_header.md", "changelog_header_no_separator.md", "changelog_header_no_newline.md"])
def test_failure(dagger_client, filename):
try:
get_changelog(filename)
assert False
except ChangelogParsingException as e:
result_filepath = PATH_TO_RESULT_FILES / filename
if not result_filepath.exists():
expected_text = ""
else:
expected_text = result_filepath.read_text()
diff = "\n".join(difflib.unified_diff(expected_text.splitlines(), str(e).splitlines()))
if WRITE_TO_RESULT_FILE:
result_file = open(result_filepath, "w")
result_file.write(str(e))
result_file.close()
assert diff == ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.
| Version | Date | Pull Request | Subject |
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.

| Version | Date | Pull Request | Subject |
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.

|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Loading

0 comments on commit 3fae982

Please sign in to comment.