Skip to content

Commit

Permalink
extract out the connector changelog modification out of the bump_vers…
Browse files Browse the repository at this point in the history
…ion code
  • Loading branch information
stephane-airbyte committed Feb 2, 2024
1 parent 8e7196e commit 9338a2e
Show file tree
Hide file tree
Showing 23 changed files with 417 additions and 21 deletions.
1 change: 1 addition & 0 deletions airbyte-ci/connectors/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ E.G.: running `pytest` on a specific test folder:

| Version | PR | Description |
| ------- | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
| 3.11.0 | [#34586](https://github.com/airbytehq/airbyte/pull/34586) | Extract connector changelog modification logic into its own class |
| 3.10.0 | [#34606](https://github.com/airbytehq/airbyte/pull/34606) | Allow configuration of separate check URL to check whether package exists already. |
| 3.9.0 | [#34606](https://github.com/airbytehq/airbyte/pull/34606) | Allow configuration of python registry URL via environment variable. |
| 3.8.1 | [#34607](https://github.com/airbytehq/airbyte/pull/34607) | Improve gradle dependency cache volume protection. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pipelines.airbyte_ci.connectors.reports import ConnectorReport, Report
from pipelines.airbyte_ci.metadata.pipeline import MetadataValidation
from pipelines.helpers import git
from pipelines.helpers.changelog import Changelog
from pipelines.helpers.connectors import metadata_change_helpers
from pipelines.models.steps import Step, StepResult, StepStatus

Expand Down Expand Up @@ -39,14 +40,14 @@ def __init__(
context: ConnectorContext,
repo_dir: Container,
new_version: str,
changelog_entry: str,
comment: str,
pull_request_number: str,
) -> None:
super().__init__(context)
self.repo_dir = repo_dir
self.new_version = new_version
self.changelog_entry = changelog_entry
self.pull_request_number = pull_request_number
self.new_version = semver.VersionInfo.parse(new_version)
self.comment = comment
self.pull_request_number = int(pull_request_number)

async def _run(self) -> StepResult:
doc_path = self.context.connector.documentation_file_path
Expand All @@ -58,7 +59,10 @@ async def _run(self) -> StepResult:
output_artifact=self.repo_dir,
)
try:
updated_doc = self.add_changelog_entry(doc_path.read_text())
original_markdown = doc_path.read_text()
changelog = Changelog(original_markdown)
changelog.add_entry(self.new_version, datetime.date.today(), self.pull_request_number, self.comment)
updated_doc = changelog.to_markdown()
except Exception as e:
return StepResult(
self,
Expand All @@ -74,21 +78,6 @@ async def _run(self) -> StepResult:
output_artifact=updated_repo_dir,
)

def find_line_index_for_new_entry(self, markdown_text: str) -> int:
lines = markdown_text.splitlines()
for line_index, line in enumerate(lines):
if "version" in line.lower() and "date" in line.lower() and "pull request" in line.lower() and "subject" in line.lower():
return line_index + 2
raise Exception("Could not find the changelog section table in the documentation file.")

def add_changelog_entry(self, og_doc_content: str) -> str:
today = datetime.date.today().strftime("%Y-%m-%d")
lines = og_doc_content.splitlines()
line_index_for_new_entry = self.find_line_index_for_new_entry(og_doc_content)
new_entry = f"| {self.new_version} | {today} | [{self.pull_request_number}](https://github.com/airbytehq/airbyte/pull/{self.pull_request_number}) | {self.changelog_entry} |"
lines.insert(line_index_for_new_entry, new_entry)
return "\n".join(lines) + "\n"


class BumpDockerImageTagInMetadata(Step):
context: ConnectorContext
Expand Down
121 changes: 121 additions & 0 deletions airbyte-ci/connectors/pipelines/pipelines/helpers/changelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import datetime
import re
from dataclasses import dataclass
from operator import attrgetter
from pathlib import Path

import semver
from pipelines.helpers.github import AIRBYTE_GITHUB_REPO
from typing_extensions import Set


class ChangelogParsingException(Exception):
pass


@dataclass(frozen=True)
class ChangelogEntry:
date: datetime.date
version: semver.Version
pr_number: int
comment: str

def to_markdown(self, github_repo=AIRBYTE_GITHUB_REPO) -> str:
return f'| {self.version} | {self.date.strftime("%Y-%m-%d")} | [{self.pr_number}](https://github.com/{github_repo}/pull/{self.pr_number}) | {self.comment} |'

def __str__(self) -> str:
return f'version={self.version}, data={self.date.strftime("%Y-%m-%d")}, pr_number={self.pr_number}, comment={self.comment}'

def __repr__(self) -> str:
return "ChangelogEntry: " + self.__str__()

def __eq__(self, other: object) -> bool:
if not isinstance(other, ChangelogEntry):
return False
retVal = (
self.date == other.date
and self.version == other.version
and self.pr_number == other.pr_number
and self.comment == other.comment
)
return retVal

def __ne__(self, other: object) -> bool:
return not (self.__eq__(other))

def __hash__(self) -> int:
return self.__str__().__hash__()


def parse_markdown(markdown_lines: list[str], github_repo: str) -> [int, set[ChangelogEntry]]:
changelog_entry_re = (
"^\\| *(?P<version>[0-9]+\\.[0-9+]+\\.[0-9]+?) *\\| *"
+ "(?P<day>[0-9]{4}-[0-9]{2}-[0-9]{2}) *\\| *"
+ "\\[?(?P<pr_number1>[0-9]*)\\]? ?\\(https://github.com/"
+ github_repo
+ "/pull/(?P<pr_number2>[0-9]*)\\) *\\| *"
+ "(?P<comment>[^ ].*[^ ]) *\\| *$"
)
changelog_header_line_index = -1
changelog_line_enumerator = enumerate(markdown_lines)
for line_index, line in changelog_line_enumerator:
if re.search(r"\| *Version *\| *Date *\| *Pull Request *\| *Subject *\|", line):
changelog_header_line_index = line_index
break
if changelog_header_line_index == -1:
raise ChangelogParsingException("Could not find the changelog section table in the documentation file.")
if markdown_lines[changelog_header_line_index - 1] != "":
raise ChangelogParsingException(
"Found changelog section table in the documentation file at line but there is not blank line before it."
)
if not re.search(r"(\|-*){4}\|", next(changelog_line_enumerator)[1]):
raise ChangelogParsingException("The changelog table in the documentation file is missing the header delimiter.")
changelog_entries_start_line_index = changelog_header_line_index + 2

# parse next line to see if it needs to be cut
entries = set()
for line_index, line in changelog_line_enumerator:
changelog_entry_regexp = re.search(changelog_entry_re, line)
if not changelog_entry_regexp or changelog_entry_regexp.group("pr_number1") != changelog_entry_regexp.group("pr_number2"):
break
entry_version = semver.VersionInfo.parse(changelog_entry_regexp.group("version"))
entry_date = datetime.datetime.strptime(changelog_entry_regexp.group("day"), "%Y-%m-%d").date()
entry_pr_number = int(changelog_entry_regexp.group("pr_number1"))
entry_comment = changelog_entry_regexp.group("comment")
changelog_entry = ChangelogEntry(entry_date, entry_version, entry_pr_number, entry_comment)
entries.add(changelog_entry)

return changelog_entries_start_line_index, entries


class Changelog:
def __init__(self, markdown: str, github_repo=AIRBYTE_GITHUB_REPO) -> None:
self.original_markdown_lines = markdown.splitlines()
self.changelog_entries_start_line_index, self.original_entries = parse_markdown(self.original_markdown_lines, github_repo)
self.new_entries: Set[ChangelogEntry] = set()
self.github_repo = github_repo

def add_entry(self, version: semver.Version, date: datetime.date, pull_request_number: int, comment: str) -> None:
self.new_entries.add(ChangelogEntry(date, version, pull_request_number, comment))

def to_markdown(self) -> str:
all_entries = set(self.original_entries.union(self.new_entries))
sorted_entries = sorted(
sorted(
sorted(sorted(all_entries, key=attrgetter("comment"), reverse=True), key=attrgetter("pr_number"), reverse=True),
key=attrgetter("date"),
reverse=True,
),
key=attrgetter("version"),
reverse=True,
)
new_lines = (
self.original_markdown_lines[: self.changelog_entries_start_line_index]
+ [line.to_markdown(self.github_repo) for line in sorted_entries]
+ self.original_markdown_lines[(self.changelog_entries_start_line_index + len(self.original_entries)) :]
)
return "\n".join(new_lines) + "\n"
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/pipelines/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "pipelines"
version = "3.10.0"
version = "3.11.0"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <contact@airbyte.io>"]

Expand Down
102 changes: 102 additions & 0 deletions airbyte-ci/connectors/pipelines/tests/test_changelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import datetime
import difflib
from pathlib import Path

import pytest
import semver
from pipelines.helpers.changelog import Changelog, ChangelogParsingException

pytestmark = [
pytest.mark.anyio,
]

PATH_TO_INITIAL_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/initial_files")
PATH_TO_RESULT_FILES = Path("airbyte-ci/connectors/pipelines/tests/test_changelog/result_files")
WRITE_TO_RESULT_FILE = False


def check_result(changelog: Changelog, result_filename: str):
markdown = changelog.to_markdown()
result_filepath = PATH_TO_RESULT_FILES / result_filename
if not result_filepath.exists():
expected_text = ""
else:
expected_text = result_filepath.read_text()
diff = "".join(difflib.unified_diff(expected_text.splitlines(1), markdown.splitlines(1)))
if WRITE_TO_RESULT_FILE:
result_file = open(result_filepath, "w")
result_file.write(markdown)
result_file.close()
assert diff == ""


def get_changelog(filename: str) -> Changelog:
filepath = PATH_TO_INITIAL_FILES / filename
return Changelog(open(filepath).read())


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_single_insert(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
check_result(changelog, "single_insert_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_versions(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-02"), 123457, "test2")
check_result(changelog, "dupicate_versions_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_version_date(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test1")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123457, "test2")
check_result(changelog, "dupicate_version_date_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_duplicate_entries(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
changelog.add_entry(semver.VersionInfo.parse("3.4.0"), datetime.date.fromisoformat("2024-03-01"), 123456, "test")
check_result(changelog, "duplicate_entry_" + filename)


@pytest.mark.parametrize("filename", ["valid_changelog_at_end.md", "valid_changelog_in_middle.md"])
def test_insert_existing_entries(dagger_client, filename):
changelog = get_changelog(filename)
changelog.add_entry(semver.VersionInfo.parse("3.3.3"), datetime.date.fromisoformat("2024-01-26"), 34573, "Adopt CDK v0.16.0")
changelog.add_entry(
semver.VersionInfo.parse("3.3.2"),
datetime.date.fromisoformat("2024-01-24"),
34465,
"Check xmin only if user selects xmin sync mode.",
)
check_result(changelog, "existing_entries_" + filename)


@pytest.mark.parametrize("filename", ["no_changelog_header.md", "changelog_header_no_separator.md", "changelog_header_no_newline.md"])
def test_failure(dagger_client, filename):
try:
get_changelog(filename)
assert False
except ChangelogParsingException as e:
result_filepath = PATH_TO_RESULT_FILES / filename
if not result_filepath.exists():
expected_text = ""
else:
expected_text = result_filepath.read_text()
diff = "\n".join(difflib.unified_diff(expected_text.splitlines(), str(e).splitlines()))
if WRITE_TO_RESULT_FILE:
result_file = open(result_filepath, "w")
result_file.write(str(e))
result_file.close()
assert diff == ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.
| Version | Date | Pull Request | Subject |
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.

| Version | Date | Pull Request | Subject |
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Postgres

Airbyte's certified Postgres connector offers the following features:
* Replicate data from tables, views and materilized views. Other data objects won't be replicated to the destination like indexes, permissions.

|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3.3.3 | 2024-01-26 | [34573](https://github.com/airbytehq/airbyte/pull/34573) | Adopt CDK v0.16.0 |
| 3.3.2 | 2024-01-24 | [34465](https://github.com/airbytehq/airbyte/pull/34465) | Check xmin only if user selects xmin sync mode. |
| 3.3.1 | 2024-01-10 | [34119](https://github.com/airbytehq/airbyte/pull/34119) | Adopt java CDK version 0.11.5. |
| 3.3.0 | 2023-12-19 | [33437](https://github.com/airbytehq/airbyte/pull/33437) | Remove LEGACY state flag | |
Loading

0 comments on commit 9338a2e

Please sign in to comment.