diff --git a/airbyte-ci/connectors/connector_ops/connector_ops/utils.py b/airbyte-ci/connectors/connector_ops/connector_ops/utils.py index a247a436d704c..b1847fa1195bb 100644 --- a/airbyte-ci/connectors/connector_ops/connector_ops/utils.py +++ b/airbyte-ci/connectors/connector_ops/connector_ops/utils.py @@ -3,6 +3,7 @@ # import functools +import json import logging import os import re @@ -369,6 +370,26 @@ def metadata(self) -> Optional[dict]: return None return yaml.safe_load((self.code_directory / METADATA_FILE_NAME).read_text())["data"] + @property + def connector_spec_file_content(self) -> Optional[dict]: + """ + The spec source of truth is the actual output of the spec command, as connector can mutate their spec. + But this is the best effort approach at statically fetching a spec without running the command on the connector. + Which is "good enough" in some cases. + """ + yaml_spec = Path(self.python_source_dir_path / "spec.yaml") + json_spec = Path(self.python_source_dir_path / "spec.json") + + if yaml_spec.exists(): + return yaml.safe_load(yaml_spec.read_text()) + elif json_spec.exists(): + with open(json_spec) as f: + return json.load(f) + elif self.manifest_path.exists(): + return yaml.safe_load(self.manifest_path.read_text())["spec"] + + return None + @property def language(self) -> ConnectorLanguage: if Path(self.code_directory / "manifest.yaml").is_file(): diff --git a/airbyte-ci/connectors/connector_ops/pyproject.toml b/airbyte-ci/connectors/connector_ops/pyproject.toml index d8183769d6495..e23372ac34c50 100644 --- a/airbyte-ci/connectors/connector_ops/pyproject.toml +++ b/airbyte-ci/connectors/connector_ops/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "connector_ops" -version = "0.7.0" +version = "0.7.1" description = "Packaged maintained by the connector operations team to perform CI for connectors" authors = ["Airbyte "] diff --git a/airbyte-ci/connectors/connectors_qa/README.md b/airbyte-ci/connectors/connectors_qa/README.md index e1c81afd5186f..c81cb682f944f 100644 --- a/airbyte-ci/connectors/connectors_qa/README.md +++ b/airbyte-ci/connectors/connectors_qa/README.md @@ -108,6 +108,12 @@ poe lint ## Changelog +### 1.7.0 + +Added `CheckDocumentationLinks`, `CheckDocumentationHeadersOrder`, `CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec`, +`CheckSourceSectionContent`, `CheckForAirbyteCloudSectionContent`, `CheckForAirbyteOpenSectionContent`, `CheckSupportedSyncModesSectionContent`, +`CheckTutorialsSectionContent`, `CheckChangelogSectionContent` checks that verifies that documentation file follow standard template. + ### 1.6.0 Added `manifest-only` connectors support — they will run basic assets and metadata checks. diff --git a/airbyte-ci/connectors/connectors_qa/pyproject.toml b/airbyte-ci/connectors/connectors_qa/pyproject.toml index 7653c912c2dfa..8a3d93eae83a4 100644 --- a/airbyte-ci/connectors/connectors_qa/pyproject.toml +++ b/airbyte-ci/connectors/connectors_qa/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "connectors-qa" -version = "1.6.0" +version = "1.7.0" description = "A package to run QA checks on Airbyte connectors, generate reports and documentation." authors = ["Airbyte "] readme = "README.md" diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation.py deleted file mode 100644 index 6e6a0f2908290..0000000000000 --- a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. - -import textwrap -from typing import List - -from connector_ops.utils import Connector # type: ignore -from connectors_qa import consts -from connectors_qa.models import Check, CheckCategory, CheckResult -from pydash.objects import get # type: ignore - - -class DocumentationCheck(Check): - category = CheckCategory.DOCUMENTATION - - -class CheckMigrationGuide(DocumentationCheck): - name = "Breaking changes must be accompanied by a migration guide" - description = "When a breaking change is introduced, we check that a migration guide is available. It should be stored under `./docs/integrations/s/-migrations.md`.\nThis document should contain a section for each breaking change, in order of the version descending. It must explain users which action to take to migrate to the new version." - - def _run(self, connector: Connector) -> CheckResult: - breaking_changes = get(connector.metadata, "releases.breakingChanges") - if not breaking_changes: - return self.create_check_result( - connector=connector, - passed=True, - message="No breaking changes found. A migration guide is not required", - ) - migration_guide_file_path = connector.migration_guide_file_path - migration_guide_exists = migration_guide_file_path is not None and migration_guide_file_path.exists() - if not migration_guide_exists: - return self.create_check_result( - connector=connector, - passed=False, - message=f"Migration guide file is missing for {connector.technical_name}. Please create a migration guide in ./docs/integrations/s/-migrations.md`", - ) - - expected_title = f"# {connector.name_from_metadata} Migration Guide" - expected_version_header_start = "## Upgrading to " - migration_guide_content = migration_guide_file_path.read_text() - try: - first_line = migration_guide_content.splitlines()[0] - except IndexError: - first_line = migration_guide_content - if not first_line == expected_title: - return self.create_check_result( - connector=connector, - passed=False, - message=f"Migration guide file for {connector.technical_name} does not start with the correct header. Expected '{expected_title}', got '{first_line}'", - ) - - # Check that the migration guide contains a section for each breaking change key ## Upgrading to {version} - # Note that breaking change is a dict where the version is the key - # Note that the migration guide must have the sections in order of the version descending - # 3.0.0, 2.0.0, 1.0.0, etc - # This means we have to record the headings in the migration guide and then check that they are in order - # We also have to check that the headings are in the breaking changes dict - ordered_breaking_changes = sorted(breaking_changes.keys(), reverse=True) - ordered_expected_headings = [f"{expected_version_header_start}{version}" for version in ordered_breaking_changes] - - ordered_heading_versions = [] - for line in migration_guide_content.splitlines(): - stripped_line = line.strip() - if stripped_line.startswith(expected_version_header_start): - version = stripped_line.replace(expected_version_header_start, "") - ordered_heading_versions.append(version) - - if ordered_breaking_changes != ordered_heading_versions: - return self.create_check_result( - connector=connector, - passed=False, - message=textwrap.dedent( - f""" - Migration guide file for {connector.name_from_metadata} has incorrect version headings. - Check for missing, extra, or misordered headings, or headers with typos. - Expected headings: {ordered_expected_headings} - """ - ), - ) - return self.create_check_result( - connector=connector, - passed=True, - message="The migration guide is correctly templated", - ) - - -class CheckDocumentationExists(DocumentationCheck): - name = "Connectors must have user facing documentation" - description = ( - "The user facing connector documentation should be stored under `./docs/integrations/s/.md`." - ) - - def _run(self, connector: Connector) -> CheckResult: - if not connector.documentation_file_path or not connector.documentation_file_path.exists(): - return self.fail( - connector=connector, - message="User facing documentation file is missing. Please create it under ./docs/integrations/s/.md", - ) - return self.pass_( - connector=connector, - message=f"User facing documentation file {connector.documentation_file_path} exists", - ) - - -class CheckDocumentationStructure(DocumentationCheck): - name = "Connectors documentation follows our guidelines" - description = f"The user facing connector documentation should follow the guidelines defined in the [documentation standards]({consts.DOCUMENTATION_STANDARDS_URL})." - - expected_sections = [ - "## Prerequisites", - "## Setup guide", - "## Supported sync modes", - "## Supported streams", - "## Changelog", - ] - - def check_main_header(self, connector: Connector, doc_lines: List[str]) -> List[str]: - errors = [] - if not doc_lines[0].lower().startswith(f"# {connector.metadata['name']}".lower()): - errors.append( - f"The connector name is not used as the main header in the documentation. Expected: '# {connector.metadata['name']}'" - ) - return errors - - def check_sections(self, doc_lines: List[str]) -> List[str]: - errors = [] - for expected_section in self.expected_sections: - if expected_section.lower() not in doc_lines: - errors.append(f"Connector documentation is missing a '{expected_section.replace('#', '').strip()}' section") - return errors - - def _run(self, connector: Connector) -> CheckResult: - if not connector.documentation_file_path or not connector.documentation_file_path.exists(): - return self.fail( - connector=connector, - message="Could not check documentation structure as the documentation file is missing.", - ) - - doc_lines = [line.lower() for line in connector.documentation_file_path.read_text().splitlines()] - - if not doc_lines: - return self.fail( - connector=connector, - message="Documentation file is empty", - ) - - errors = [] - errors.extend(self.check_main_header(connector, doc_lines)) - errors.extend(self.check_sections(doc_lines)) - - if errors: - return self.fail( - connector=connector, - message=f"Connector documentation does not follow the guidelines: {'. '.join(errors)}", - ) - return self.pass_( - connector=connector, - message="Documentation guidelines are followed", - ) - - -class CheckChangelogEntry(DocumentationCheck): - name = "Connectors must have a changelog entry for each version" - description = "Each new version of a connector must have a changelog entry defined in the user facing documentation in `./docs/integrations/s/.md`." - - def _run(self, connector: Connector) -> CheckResult: - if connector.documentation_file_path is None or not connector.documentation_file_path.exists(): - return self.fail( - connector=connector, - message="Could not check changelog entry as the documentation file is missing. Please create it.", - ) - - doc_lines = connector.documentation_file_path.read_text().splitlines() - if not doc_lines: - return self.fail( - connector=connector, - message="Documentation file is empty", - ) - - after_changelog = False - entry_found = False - for line in doc_lines: - if "# changelog" in line.lower(): - after_changelog = True - if after_changelog and connector.version in line: - entry_found = True - - if not after_changelog: - return self.fail( - connector=connector, - message="Connector documentation is missing a 'Changelog' section", - ) - if not entry_found: - return self.fail( - connector=connector, - message=f"Connectors must have a changelog entry for each version: changelog entry for version {connector.version} is missing in the documentation", - ) - - return self.pass_(connector=connector, message=f"Changelog entry found for version {connector.version}") - - -ENABLED_CHECKS = [ - CheckMigrationGuide(), - CheckDocumentationExists(), - # CheckDocumentationStructure(), # Disabled as many are failing - we either need a big push or to block everyone. See https://github.com/airbytehq/airbyte/commit/4889e6e024d64ba0e353611f8fe67497b02de190#diff-3c73c6521bf819248b3d3d8aeab7cacfa4e8011f9890da93c77da925ece7eb20L262 - CheckChangelogEntry(), -] diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/__init__.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/__init__.py new file mode 100644 index 0000000000000..4c3f31b6ccde0 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from connectors_qa.checks.documentation.documentation import ( + CheckChangelogEntry, + CheckChangelogSectionContent, + CheckDocumentationExists, + CheckDocumentationHeadersOrder, + CheckDocumentationLinks, + CheckForAirbyteCloudSectionContent, + CheckForAirbyteOpenSectionContent, + CheckMigrationGuide, + CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec, + CheckSourceSectionContent, + CheckSupportedSyncModesSectionContent, + CheckTutorialsSectionContent, +) + +ENABLED_CHECKS = [ + CheckMigrationGuide(), + CheckDocumentationExists(), + CheckDocumentationLinks(), + CheckDocumentationHeadersOrder(), + CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec(), + CheckSourceSectionContent(), + CheckForAirbyteCloudSectionContent(), + CheckForAirbyteOpenSectionContent(), + CheckSupportedSyncModesSectionContent(), + CheckTutorialsSectionContent(), + CheckChangelogSectionContent(), + CheckChangelogEntry(), +] diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/documentation.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/documentation.py new file mode 100644 index 0000000000000..aa59d35aa1d75 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/documentation.py @@ -0,0 +1,590 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +import abc +import textwrap +from difflib import get_close_matches, ndiff +from threading import Thread +from typing import List + +import requests # type: ignore +from connector_ops.utils import Connector, ConnectorLanguage # type: ignore +from connectors_qa.models import Check, CheckCategory, CheckResult +from pydash.objects import get # type: ignore + +from .helpers import ( + generate_description, + prepare_changelog_to_compare, + prepare_headers, + reason_missing_titles, + reason_titles_not_match, + replace_connector_specific_urls_from_section, + required_titles_from_spec, +) +from .models import DocumentationContent, TemplateContent + + +class DocumentationCheck(Check): + category = CheckCategory.DOCUMENTATION + + +class CheckMigrationGuide(DocumentationCheck): + name = "Breaking changes must be accompanied by a migration guide" + description = "When a breaking change is introduced, we check that a migration guide is available. It should be stored under `./docs/integrations/s/-migrations.md`.\nThis document should contain a section for each breaking change, in order of the version descending. It must explain users which action to take to migrate to the new version." + + def _run(self, connector: Connector) -> CheckResult: + breaking_changes = get(connector.metadata, "releases.breakingChanges") + if not breaking_changes: + return self.create_check_result( + connector=connector, + passed=True, + message="No breaking changes found. A migration guide is not required", + ) + migration_guide_file_path = connector.migration_guide_file_path + migration_guide_exists = migration_guide_file_path is not None and migration_guide_file_path.exists() + if not migration_guide_exists: + return self.create_check_result( + connector=connector, + passed=False, + message=f"Migration guide file is missing for {connector.technical_name}. Please create a migration guide in ./docs/integrations/s/-migrations.md`", + ) + + expected_title = f"# {connector.name_from_metadata} Migration Guide" + expected_version_header_start = "## Upgrading to " + migration_guide_content = migration_guide_file_path.read_text() + try: + first_line = migration_guide_content.splitlines()[0] + except IndexError: + first_line = migration_guide_content + if not first_line == expected_title: + return self.create_check_result( + connector=connector, + passed=False, + message=f"Migration guide file for {connector.technical_name} does not start with the correct header. Expected '{expected_title}', got '{first_line}'", + ) + + # Check that the migration guide contains a section for each breaking change key ## Upgrading to {version} + # Note that breaking change is a dict where the version is the key + # Note that the migration guide must have the sections in order of the version descending + # 3.0.0, 2.0.0, 1.0.0, etc + # This means we have to record the headings in the migration guide and then check that they are in order + # We also have to check that the headings are in the breaking changes dict + ordered_breaking_changes = sorted(breaking_changes.keys(), reverse=True) + ordered_expected_headings = [f"{expected_version_header_start}{version}" for version in ordered_breaking_changes] + + ordered_heading_versions = [] + for line in migration_guide_content.splitlines(): + stripped_line = line.strip() + if stripped_line.startswith(expected_version_header_start): + version = stripped_line.replace(expected_version_header_start, "") + ordered_heading_versions.append(version) + + if ordered_breaking_changes != ordered_heading_versions: + return self.create_check_result( + connector=connector, + passed=False, + message=textwrap.dedent( + f""" + Migration guide file for {connector.name_from_metadata} has incorrect version headings. + Check for missing, extra, or misordered headings, or headers with typos. + Expected headings: {ordered_expected_headings} + """ + ), + ) + return self.create_check_result( + connector=connector, + passed=True, + message="The migration guide is correctly templated", + ) + + +class CheckDocumentationExists(DocumentationCheck): + name = "Connectors must have user facing documentation" + description = ( + "The user facing connector documentation should be stored under `./docs/integrations/s/.md`." + ) + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="User facing documentation file is missing. Please create it under ./docs/integrations/s/.md", + ) + return self.pass_( + connector=connector, + message=f"User facing documentation file {connector.documentation_file_path} exists", + ) + + +class CheckDocumentationContent(DocumentationCheck): + """ + For now, we check documentation structure for sources with sl >= 300. + """ + + applies_to_connector_languages = [ConnectorLanguage.PYTHON, ConnectorLanguage.LOW_CODE] + applies_to_connector_ab_internal_sl = 300 + applies_to_connector_types = ["source"] + + +class CheckDocumentationLinks(CheckDocumentationContent): + name = "Links used in connector documentation are valid" + description = f"The user facing connector documentation should update invalid links in connector documentation. For links that are used as example and return 404 status code, use `example: ` before link to skip it." + + def validate_links(self, connector: Connector) -> List[str]: + errors = [] + threads = [] + + def request_link(docs_link: str) -> None: + try: + response = requests.get(docs_link) + if response.status_code == 404: + errors.append(f"{docs_link} with {response.status_code} status code") + except requests.exceptions.SSLError: + pass + except requests.exceptions.ConnectionError: + pass + + for link in DocumentationContent(connector=connector).links: + process = Thread(target=request_link, args=[link]) + process.start() + threads.append(process) + + for process in threads: + process.join(timeout=30) # 30s timeout for process else link will be skipped + process.is_alive() + + return errors + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check documentation links as the documentation file is missing.", + ) + + if not connector.documentation_file_path.read_text().rstrip(): + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + errors = self.validate_links(connector) + if errors: + return self.fail( + connector=connector, + message=f"Connector documentation uses invalid links: {', '.join(errors)}", + ) + return self.pass_( + connector=connector, + message="Documentation links are valid", + ) + + +class CheckDocumentationHeadersOrder(CheckDocumentationContent): + name = "Connectors documentation headers structure, naming and order follow our guidelines" + + CONNECTOR_SPECIFIC_HEADINGS = "CONNECTOR_SPECIFIC_FEATURES" + + @property + def description(self) -> str: + ordered_headers = TemplateContent("CONNECTOR_NAME_FROM_METADATA").headers_with_tag() + not_required_headers = [ + f"Set up the CONNECTOR_NAME_FROM_METADATA connector in Airbyte", + "For Airbyte Cloud: (as subtitle of Set up CONNECTOR_NAME_FROM_METADATA)", + "For Airbyte Open Source: (as subtitle of Set up CONNECTOR_NAME_FROM_METADATA)", + self.CONNECTOR_SPECIFIC_HEADINGS + " (but this headers should be on a right place according to expected order)", + "Performance considerations", + "Data type map", + "Limitations & Troubleshooting", + "Tutorials", + ] + + return generate_description( + "documentation_headers_check_description.md.j2", + {"ordered_headers": ordered_headers, "not_required_headers": not_required_headers}, + ) + + def get_not_required_headers(self, connector_name: str) -> list[str]: + not_required = [ + f"Set up the {connector_name} connector in Airbyte", + "For Airbyte Cloud:", + "For Airbyte Open Source:", + self.CONNECTOR_SPECIFIC_HEADINGS, + "Performance considerations", + "Data type map", + "Limitations & Troubleshooting", + "Tutorials", + ] + return not_required + + def check_headers(self, connector: Connector) -> List[str]: + """ + test_docs_structure gets all top-level headers from source documentation file and check that the order is correct. + The order of the headers should follow our standard template connectors_qa/checks/documentation/templates/template.md.j2, + which based on https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw. + _get_template_headings returns tuple of headers as in standard template and non-required headers that might nor be in the source docs. + CONNECTOR_SPECIFIC_HEADINGS value in list of required headers that shows a place where should be a connector specific headers, + which can be skipped as out of standard template and depends on connector. + """ + errors = [] + + actual_headers = prepare_headers(DocumentationContent(connector=connector).headers) + expected_headers = TemplateContent(connector.name_from_metadata).headers + not_required_headers = self.get_not_required_headers(connector.name_from_metadata) + + actual_header_len, expected_len = len(actual_headers), len(expected_headers) + actual_header_index, expected_header_index = 0, 0 + + while actual_header_index < actual_header_len and expected_header_index < expected_len: + actual_header = actual_headers[actual_header_index] + expected_header = expected_headers[expected_header_index] + # check that template header is specific for connector and actual header should not be validated + if expected_header == self.CONNECTOR_SPECIFIC_HEADINGS: + # check that actual header is not in required headers, as required headers should be on a right place and order + if actual_header not in expected_headers: + actual_header_index += 1 # go to the next actual header as CONNECTOR_SPECIFIC_HEADINGS can be more than one + continue + else: + # if actual header is required go to the next template header to validate actual header order + expected_header_index += 1 + continue + # strict check that actual header equals template header + if actual_header == expected_header: + # found expected header, go to the next header in template and actual headers + actual_header_index += 1 + expected_header_index += 1 + continue + # actual header != template header means that template value is not required and can be skipped + if expected_header in not_required_headers: + # found non-required header, go to the next template header to validate actual header + expected_header_index += 1 + continue + # any check is True, indexes didn't move to the next step + errors.append(reason_titles_not_match(actual_header, expected_header, expected_headers)) + return errors + # indexes didn't move to the last required one, so some headers are missed + if expected_header_index != expected_len: + errors.append(reason_missing_titles(expected_header_index, expected_headers, not_required_headers)) + return errors + + return errors + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check documentation structure as the documentation file is missing.", + ) + + if not connector.documentation_file_path.read_text(): + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + errors = self.check_headers(connector) + + if errors: + return self.fail( + connector=connector, + message=f"Documentation headers ordering/naming doesn't follow guidelines:\n {'. '.join(errors)}", + ) + return self.pass_( + connector=connector, + message="Documentation guidelines are followed", + ) + + +class CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec(CheckDocumentationContent): + name = "Prerequisites section of the documentation describes all required fields from specification" + description = ( + "The user facing connector documentation should update `Prerequisites`" + " section with description for all required fields from source specification. " + "Having described all required fields in a one place helps Airbyte users easily set up the source connector. \n" + "If spec has required credentials/access_token/refresh_token etc, " + 'check searches for one of ["account", "auth", "credentials", "access", "client"] words. ' + "No need to add credentials/access_token/refresh_token etc to the section" + ) + + PREREQUISITES = "Prerequisites" + CREDENTIALS_KEYWORDS = ["account", "auth", "credentials", "access", "client"] + + def check_prerequisites(self, connector: Connector) -> List[str]: + actual_connector_spec = connector.connector_spec_file_content + if not actual_connector_spec: + return [] + + documentation = DocumentationContent(connector=connector) + if self.PREREQUISITES not in documentation.headers: + return [f"Documentation does not have {self.PREREQUISITES} section."] + + actual_contents = documentation.section(self.PREREQUISITES) + if len(actual_contents) > 1: + return [f"Documentation has more than one {self.PREREQUISITES} section. Please check it."] + + missing_fields: List[str] = [] + section_content = documentation.section(self.PREREQUISITES)[0].lower() + + spec = actual_connector_spec.get("connectionSpecification") or actual_connector_spec.get("connection_specification") + required_titles, has_credentials = required_titles_from_spec(spec) # type: ignore + + for title in required_titles: + if title.lower() not in section_content: + missing_fields.append(title) + + if has_credentials: + # credentials has specific check for keywords as we have a lot of ways how to describe this step + credentials_validation = [k in section_content for k in self.CREDENTIALS_KEYWORDS] + if True not in credentials_validation: + missing_fields.append(f"credentials") + + return missing_fields + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check documentation structure as the documentation file is missing.", + ) + + if not connector.documentation_file_path.read_text(): + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + # check_prerequisites uses spec content from file, not from spec command, + # which possible can lead to incorrect testing, for now it works for connectors with sl>=300. + # But if someone faced with unexpected behavior of this test it's better to disable it. + + errors = self.check_prerequisites(connector) + + if errors: + return self.fail( + connector=connector, + message=f"Missing descriptions for required spec fields: {'. '.join(errors)}", + ) + return self.pass_( + connector=connector, + message="All required fields from spec are present in the connector documentation", + ) + + +class CheckSection(CheckDocumentationContent): + required = True + expected_section_index = 0 + + @property + def name(self) -> str: + return f"'{self.header}' section of the documentation follows our guidelines" + + @property + def description(self) -> str: + templates = TemplateContent("CONNECTOR_NAME_FROM_METADATA").section(self.header) + if len(templates) > 1: + template = templates[1] + else: + template = templates[0] + + return generate_description("section_content_description.md.j2", {"header": self.header, "template": template}) + + @property + @abc.abstractmethod + def header(self) -> str: + """The name of header for validating content""" + + def check_section(self, connector: Connector) -> List[str]: + documentation = DocumentationContent(connector=connector) + + if self.header not in documentation.headers: + if self.required: + return [f"Documentation does not have {self.header} section."] + return [] + + errors = [] + + expected = TemplateContent(connector.name_from_metadata).section(self.header)[self.expected_section_index] + actual_contents = documentation.section(self.header) + actual_contents = [c[: len(expected)] if len(c) > len(expected) else c for c in actual_contents] + + close_matches = get_close_matches(expected, actual_contents) + if not close_matches: + return [f"Please review your {self.header} section, unable to find the expected content:\n{expected}"] + + actual = close_matches[0] + if actual != expected: + errors = list(ndiff(actual.splitlines(keepends=True), expected.splitlines(keepends=True))) + + return errors + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check documentation structure as the documentation file is missing.", + ) + + if not connector.documentation_file_path.read_text(): + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + errors = self.check_section(connector) + + if errors: + return self.fail( + connector=connector, + message=f"Connector {self.header} section content does not follow standard template:\n{''.join(errors)}", + ) + return self.pass_( + connector=connector, + message="Documentation guidelines are followed", + ) + + +class CheckSourceSectionContent(CheckDocumentationContent): + name = "Main Source Section of the documentation follows our guidelines" + + expected_section_index = 0 + + @property + def description(self) -> str: + template = TemplateContent("CONNECTOR_NAME_FROM_METADATA").section("CONNECTOR_NAME_FROM_METADATA")[0] + + return generate_description("section_content_description.md.j2", {"header": "CONNECTOR_NAME_FROM_METADATA", "template": template}) + + def check_source_follows_template(self, connector: Connector) -> List[str]: + documentation = DocumentationContent(connector=connector) + + if connector.name_from_metadata not in documentation.headers: + return [f"Documentation does not have {connector.name_from_metadata} section."] + + errors = [] + + header = connector.name_from_metadata + + expected_content = TemplateContent(header).section(header) + actual_contents = DocumentationContent(connector).section(header) + + expected = expected_content[self.expected_section_index] + + if not actual_contents: + return [f"Please update your {header} section section content to follow our guidelines:\n{expected}"] + if len(actual_contents) > 1: + return [f"Expected only one header {header}. Please rename duplicate."] + + actual = replace_connector_specific_urls_from_section(actual_contents[0]) + # actual connector doc can have imports etc. in this section + if expected not in actual: + errors = list(ndiff(actual.splitlines(keepends=True), expected.splitlines(keepends=True))) + + return errors + + def _run(self, connector: Connector) -> CheckResult: + if not connector.documentation_file_path or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check documentation structure as the documentation file is missing.", + ) + + if not connector.documentation_file_path.read_text(): + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + errors = self.check_source_follows_template(connector) + + if errors: + return self.fail( + connector=connector, + message=f"Connector {connector.name_from_metadata} section content does not follow standard template:{''.join(errors)}", + ) + return self.pass_( + connector=connector, + message="Documentation guidelines are followed", + ) + + +class CheckForAirbyteCloudSectionContent(CheckSection): + header = "For Airbyte Cloud:" + expected_section_index = 1 + + +class CheckForAirbyteOpenSectionContent(CheckSection): + header = "For Airbyte Open Source:" + expected_section_index = 1 + + +class CheckSupportedSyncModesSectionContent(CheckSection): + header = "Supported sync modes" + + +class CheckTutorialsSectionContent(CheckSection): + header = "Tutorials" + required = False + + +class CheckChangelogSectionContent(CheckSection): + header = "Changelog" + + def check_section(self, connector: Connector) -> List[str]: + documentation = DocumentationContent(connector=connector) + + if self.header not in documentation.headers: + if self.required: + return [f"Documentation does not have {self.header} section."] + return [] + + errors = [] + + expected = TemplateContent(connector.name_from_metadata).section(self.header)[self.expected_section_index] + actual_contents = documentation.section(self.header) + + if len(actual_contents) > 1: + return [f"Documentation has more than one {self.header} section. Please check it."] + + actual = prepare_changelog_to_compare(actual_contents[0])[: len(expected)] + if actual != expected: + errors = list(ndiff(actual.splitlines(keepends=True), expected.splitlines(keepends=True))) + + return errors + + +class CheckChangelogEntry(DocumentationCheck): + name = "Connectors must have a changelog entry for each version" + description = "Each new version of a connector must have a changelog entry defined in the user facing documentation in `./docs/integrations/s/.md`." + + def _run(self, connector: Connector) -> CheckResult: + if connector.documentation_file_path is None or not connector.documentation_file_path.exists(): + return self.fail( + connector=connector, + message="Could not check changelog entry as the documentation file is missing. Please create it.", + ) + + doc_lines = connector.documentation_file_path.read_text().splitlines() + if not doc_lines: + return self.fail( + connector=connector, + message="Documentation file is empty", + ) + + after_changelog = False + entry_found = False + for line in doc_lines: + if "# changelog" in line.lower(): + after_changelog = True + if after_changelog and connector.version in line: + entry_found = True + + if not after_changelog: + return self.fail( + connector=connector, + message="Connector documentation is missing a 'Changelog' section", + ) + if not entry_found: + return self.fail( + connector=connector, + message=f"Connectors must have a changelog entry for each version: changelog entry for version {connector.version} is missing in the documentation", + ) + + return self.pass_(connector=connector, message=f"Changelog entry found for version {connector.version}") diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/helpers.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/helpers.py new file mode 100644 index 0000000000000..1c947d0313447 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/helpers.py @@ -0,0 +1,152 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +import re +from difflib import get_close_matches +from pathlib import Path +from typing import Any + +from connector_ops.utils import Connector # type: ignore +from jinja2 import Environment, FileSystemLoader +from markdown_it import MarkdownIt +from markdown_it.tree import SyntaxTreeNode + + +def remove_step_from_heading(heading: str) -> str: + if "Step 1: " in heading: + return heading.replace("Step 1: ", "") + if "Step 2: " in heading: + return heading.replace("Step 2: ", "") + return heading + + +def required_titles_from_spec(spec: dict[str, Any]) -> tuple[list[str], bool]: + has_credentials = False + spec_required = spec.get("required") + if not spec_required: + return [], False + + spec_properties = spec["properties"].keys() + creds = ["credentials", "client_id", "client_secret", "access_token", "refresh_token", "authorization"] + + if any(x in spec_required for x in creds): + has_credentials = True + if any(x in spec_properties for x in creds): + has_credentials = True + if has_credentials: + [spec_required.remove(cred) for cred in creds if cred in spec_required] + + titles = [spec["properties"][field]["title"].lower() for field in spec_required] + return titles, has_credentials + + +def documentation_node(connector_documentation: str) -> SyntaxTreeNode: + md = MarkdownIt("commonmark") + tokens = md.parse(connector_documentation) + return SyntaxTreeNode(tokens) + + +def header_name(n: SyntaxTreeNode) -> str: + return n.to_tokens()[1].children[0].content # type: ignore + + +def replace_connector_specific_urls_from_section(content: str) -> str: + link_to_replace = "{docs_link}" + + def _replace_link(docs_string: str) -> str: + links = re.findall("(https?://[^\s)]+)", docs_string) + for link in links: + docs_string = docs_string.replace(link, link_to_replace) + return docs_string + + content = _replace_link(content) + return content + + +def remove_not_required_step_headers(headers: list[str]) -> list[str]: + """ + Removes headers like Step 1.1 Step 3 Step 2.3 from actual headers, if they placed after Step 1: header. + from: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 1.11: do something 11", + "Step 2: do something 2", "Step 2.1: do something 2.1", "Changelog" + To: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 2: do something 2", "Changelog" + This is connector specific headers, so we can ignore them. + """ + step_one_index = None + for header in headers: + if re.search("Step 1: ", header): + step_one_index = headers.index(header) + if not step_one_index: # docs doesn't have Step 1 headers + return headers + + step_headers = headers[step_one_index:] + pattern = "Step \d+.?\d*: " + step = "Step 1: " + i = 0 + while i < len(step_headers): + if step in step_headers[i]: # if Step 1/2: is substring of current header + if i + 1 < len(step_headers) and re.match(pattern, step_headers[i + 1]): # check that header has Step x: + if "Step 2: " in step_headers[i + 1]: # found Step 2, it's required header, move to the next one + step = "Step 2: " + i += 1 + continue + else: + step_headers.remove(step_headers[i + 1]) # remove all other steps from headers + continue # move to the next header after Step 1/2 header + else: + break + break + + headers = headers[:step_one_index] + step_headers + return headers + + +def reason_titles_not_match(heading_names_value: str, template_headings_value: str, template_headings: list[str]) -> str: + reason = f"Heading '{heading_names_value}' is not in the right place, the name of heading is incorrect or not expected.\n" + close_titles = get_close_matches(heading_names_value, template_headings) + if close_titles and close_titles[0] != heading_names_value: + diff = f"Diff:\nActual Heading: '{heading_names_value}'. Possible correct heading: '{close_titles}'. Expected Heading: '{template_headings_value}'" + else: + diff = f"Diff:\nActual Heading: '{heading_names_value}'. Expected Heading: '{template_headings_value}'" + return reason + diff + + +def reason_missing_titles(template_headings_index: int, template_headings: list[str], not_required_headers: list[str]) -> str: + missing = template_headings[template_headings_index:] + required = [m for m in missing if m not in not_required_headers] + return f"Required missing headers: {required}. All missing headers: {missing}" + + +def description_end_line_index(heading: str, actual_headings: tuple[str, ...], header_line_map: dict[str, int]) -> int: + if actual_headings.index(heading) + 1 == len(actual_headings): + return # type: ignore + return header_line_map[actual_headings[actual_headings.index(heading) + 1]] - 1 + + +def prepare_headers(headers: list[str]) -> list[str]: + headers = remove_not_required_step_headers(headers) # remove Step 1.1 Step 3 ... headers + headers = [remove_step_from_heading(h) for h in headers] # remove Step 1 and Step 2 from header name + return headers + + +def prepare_changelog_to_compare(docs: str) -> str: + docs_to_compare = [] + _siblings_content = [] + n = "\n" + node = documentation_node(docs) + + for sibling in node[0].siblings: + _siblings_content.append(sibling.content.rstrip()) + + for c in _siblings_content: + if n in c: + docs_to_compare += [_c + n for _c in c.split(n)] + else: + docs_to_compare.append(c) + + return "".join(docs_to_compare) + + +def generate_description(template_file: str, kwargs: dict[str, Any]) -> str: + environment = Environment(loader=FileSystemLoader(Path(__file__).parent / "templates/")) + template = environment.get_template(template_file) + template_content = template.render(**kwargs) + return template_content diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/models.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/models.py new file mode 100644 index 0000000000000..160cc4342c4ff --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/models.py @@ -0,0 +1,164 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +import re +from pathlib import Path +from typing import Any, Dict, List + +from connector_ops.utils import Connector # type: ignore +from jinja2 import Environment, FileSystemLoader +from markdown_it.tree import SyntaxTreeNode + +from .helpers import documentation_node, header_name, remove_step_from_heading + + +class SectionLines: + def __init__(self, start: int, end: int): + self.start = start + self.end = end + + def __repr__(self) -> str: + return f"{self.start} - {self.end}" + + +class SectionContent: + def __init__(self, header: str): + self.header = header + self._content: List[str] = [] + + @property + def content(self) -> List[str]: + return self._content + + @content.setter + def content(self, content: str) -> None: + self._content.append(content) + + def __repr__(self) -> str: + return f"{self.header}: {self.content}" + + +class Content: + HEADING = "heading" + supported_header_levels = ["h1", "h2", "h3", "h4"] + + def __init__(self) -> None: + self.content = self._content() + self.node = self._node() + self.header_line_map = self._header_line_map() + self.headers = self._headers() + self.sections = self._sections() + + def _content(self) -> str: # type: ignore + pass + + def _sections(self) -> list[SectionContent]: # type: ignore + pass + + def _node(self) -> SyntaxTreeNode: + node = documentation_node(self.content) + return node + + def _headers(self) -> list[str]: + headers = [] + for n in self.node: # type: ignore + if n.type == self.HEADING and n.tag in self.supported_header_levels: + headers.append(remove_step_from_heading(header_name(n))) + + return headers + + def _header_line_map(self) -> Dict[str, list[SectionLines]]: + headers = [] + starts = [] + header_line_map: Dict[str, list[SectionLines]] = {} + + for n in self.node: # type: ignore + if n.type == self.HEADING: + headers.append(header_name(n)) + starts.append(n.map[1]) + + i = 0 + while len(headers) > i: + header = headers[i] + start_index = i + end_index = starts[start_index + 1] - 1 if start_index + 1 < len(headers) else None + if header not in header_line_map.keys(): + header_line_map[header] = [SectionLines(start=starts[start_index], end=end_index)] + else: + header_line_map[header] = header_line_map[header] + [SectionLines(start=starts[start_index], end=end_index)] + i += 1 + + return header_line_map + + def section(self, header) -> list[str]: # type: ignore + for s in self.sections: + if s.header == header: + return s.content + + +class DocumentationContent(Content): + def __init__(self, connector: Connector): + self.connector = connector + super().__init__() + self.links = self._links() + + def _content(self) -> str: + return self.connector.documentation_file_path.read_text().rstrip() + + def _links(self) -> list[str]: + return re.findall("(? list[SectionContent]: + sections_list = [] + + with open(self.connector.documentation_file_path) as docs_file: + doc_lines = docs_file.readlines() + + for key, value in self.header_line_map.items(): + section = SectionContent(header=key) + sections_list.append(section) + + for lines in value: + section_content = "".join(doc_lines[lines.start : lines.end]) + section.content = section_content # type: ignore + + return sections_list + + +class TemplateContent(Content): + template_file = "template.md.j2" + template_folder = Path(__file__).parent / "templates/" + + def __init__(self, connector_name: str): + self.connector_name = connector_name + super().__init__() + self.sections = self._sections() + + def _content(self) -> str: + environment = Environment(loader=FileSystemLoader(self.template_folder)) + template = environment.get_template(self.template_file) + template_content = template.render(connector_name=self.connector_name) + return template_content + + def _sections(self) -> list[SectionContent]: + template_lines = self.content.splitlines(keepends=True) + + sections_list = [] + + for key, value in self.header_line_map.items(): + section = SectionContent(header=key) + sections_list.append(section) + + for lines in value: + section_content = "".join(template_lines[lines.start : lines.end]) + section.content = section_content # type: ignore + + return sections_list + + def headers_with_tag(self) -> list[str]: + headers = [] + for n in self.node: # type: ignore + if n.type == self.HEADING and n.tag in self.supported_header_levels: + header = "#" * int(n.tag.replace("h", "")) + " " + remove_step_from_heading(header_name(n)) + headers.append(header) + + return headers diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/documentation_headers_check_description.md.j2 b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/documentation_headers_check_description.md.j2 new file mode 100644 index 0000000000000..d2bfcabd267d5 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/documentation_headers_check_description.md.j2 @@ -0,0 +1,17 @@ +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +This check expects the following order of headers in the documentation: + +```` + +{% for i in ordered_headers %} + {{ i }} +{% endfor %} + +```` + + +List of not required headers, which can be not exist in the documentation and their strict check will be skipped: +{% for i in not_required_headers %} + - {{ i }} +{% endfor %} diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/section_content_description.md.j2 b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/section_content_description.md.j2 new file mode 100644 index 0000000000000..f94ebf0e4c892 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/section_content_description.md.j2 @@ -0,0 +1,7 @@ +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that {{ header }} header section content follows standard template: + +```` +{{ template }} +```` \ No newline at end of file diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2 b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2 new file mode 100644 index 0000000000000..3d74b02442a0a --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2 @@ -0,0 +1,73 @@ +# {{ connector_name }} + + + +This page contains the setup guide and reference information for the [{{ connector_name }}]({docs_link}) source connector. + + + +## Prerequisites + +## Setup guide + +## Set up {{ connector_name }} + +Describes actions that should be done on source side before set up in Airbyte + +### For Airbyte Cloud: + +Describes specific for airbyte cloud set up steps of the connector + +### For Airbyte Open Source: + +Describes specific for airbyte oss set up steps of the connector + +### CONNECTOR_SPECIFIC_FEATURES + +Heading fo set up steps which are specific for connector and will be ignored while check. + +## Set up the {{ connector_name }} connector in Airbyte + +Describes how to set up the connector in Airbyte. + +### For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source. +3. On the Set up the source page, select {{ connector_name }} from the Source type dropdown. +4. Enter a name for the {{ connector_name }} connector. +### For Airbyte Open Source: + +1. Navigate to the Airbyte Open Source dashboard. +## CONNECTOR_SPECIFIC_FEATURES + +Heading for set up steps in Airbyte which are specific for connector and will be ignored while check. + +## Supported sync modes + +The {{ connector_name }} source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +## Supported Streams + +## CONNECTOR_SPECIFIC_FEATURES + +Other related to the connector features, e.g. Include deleted, Supported objects etc. + +### Performance considerations + +## Data type map + +## Limitations & Troubleshooting + +### CONNECTOR_SPECIFIC_FEATURES + +Specific Limitations & Troubleshooting related to the connector. + +### Tutorials + +Now that you have set up the {{ connector_name }} source connector, check out the following {{ connector_name }} tutorials: + +## Changelog +
+ Expand to review +
diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/models.py b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/models.py index 1467292966176..5d6eb29fe615b 100644 --- a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/models.py +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/models.py @@ -123,6 +123,15 @@ def applies_to_connector_types(self) -> List[str]: """ return ALL_TYPES + @property + def applies_to_connector_ab_internal_sl(self) -> int: + """The connector ab_internal_s that the QA check applies to + + Returns: + int: integer value for connector ab_internal_sl level + """ + return 0 + @property @abstractmethod def category(self) -> CheckCategory: @@ -187,6 +196,11 @@ def run(self, connector: Connector) -> CheckResult: connector, f"Check does not apply to {connector.cloud_usage} connectors", ) + if connector.ab_internal_sl < self.applies_to_connector_ab_internal_sl: + return self.skip( + connector, + f"Check does not apply to connectors with sl < {self.applies_to_connector_ab_internal_sl}", + ) return self._run(connector) def _run(self, connector: Connector) -> CheckResult: diff --git a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/templates/qa_checks.md.j2 b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/templates/qa_checks.md.j2 index 7c3849cff988e..d22df4dc7a9af 100644 --- a/airbyte-ci/connectors/connectors_qa/src/connectors_qa/templates/qa_checks.md.j2 +++ b/airbyte-ci/connectors/connectors_qa/src/connectors_qa/templates/qa_checks.md.j2 @@ -13,6 +13,7 @@ They are by no mean replacing the need for a manual review of the connector code _Applies to the following connector types: {{ ', '.join(check.applies_to_connector_types) }}_ _Applies to the following connector languages: {{ ', '.join(check.applies_to_connector_languages) }}_ _Applies to connector with {{ ', '.join(check.applies_to_connector_support_levels) if check.applies_to_connector_support_levels else 'any' }} support level_ +_Applies to connector with {{ check.applies_to_connector_ab_internal_sl if check.applies_to_connector_ab_internal_sl else 'any' }} internal support level_ _Applies to connector with {{ ', '.join(check.applies_to_connector_cloud_usage) if check.applies_to_connector_cloud_usage else 'any' }} Airbyte usage level_ {{ check.description }} diff --git a/airbyte-ci/connectors/connectors_qa/tests/conftest.py b/airbyte-ci/connectors/connectors_qa/tests/conftest.py index 31956fc0dfa6f..0b98b478ed87e 100644 --- a/airbyte-ci/connectors/connectors_qa/tests/conftest.py +++ b/airbyte-ci/connectors/connectors_qa/tests/conftest.py @@ -2,6 +2,7 @@ import random import string +from pathlib import Path import pytest @@ -9,3 +10,42 @@ @pytest.fixture def random_string(): return "".join(random.choices(string.ascii_uppercase + string.digits, k=10)) + + +def connector(tmp_path, mocker, data_file): + documentation_file_path = tmp_path / "documentation.md" + + connector = mocker.Mock( + technical_name="test-connector", + version="1.0.0", + documentation_file_path=documentation_file_path, + name="GitHub", + ab_internal_sl=300, + language="python", + connector_type="source", + metadata={"name": "GitHub"}, + name_from_metadata="GitHub", + connector_spec_file_content={ + "connectionSpecification": {"required": ["repos"], "properties": {"repos": {"title": "GitHub Repositories"}}} + }, + ) + with open(Path(__file__).parent / f"unit_tests/test_checks/data/docs/{data_file}.md", "r") as f: + data = f.read().rstrip() + connector.documentation_file_path.write_text(data) + + return connector + + +@pytest.fixture +def connector_with_invalid_links_in_documentation(tmp_path, mocker): + return connector(tmp_path, mocker, "invalid_links") + + +@pytest.fixture +def connector_with_invalid_documentation(tmp_path, mocker): + return connector(tmp_path, mocker, "incorrect_not_all_structure") + + +@pytest.fixture +def connector_with_correct_documentation(tmp_path, mocker): + return connector(tmp_path, mocker, "correct") diff --git a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/correct.md b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/correct.md new file mode 100644 index 0000000000000..ce519caa5cb80 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/correct.md @@ -0,0 +1,324 @@ +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector. + + + +## Prerequisites + +- List of GitHub Repositories (and access for them in case they are private) + + +**For Airbyte Cloud:** + +- OAuth +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + + +**For Airbyte Open Source:** + +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + +## Setup guide + +### Step 1: Set up GitHub + +Create a [GitHub Account](https://github.com). + + +**Airbyte Open Source additional setup steps** + +Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +### Step 2: Set up the GitHub connector in Airbyte + + +## For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source. +3. On the Set up the source page, select GitHub from the Source type dropdown. +4. Enter a name for the GitHub connector. +4. Set start date if needed. +5. To authenticate: + + + +### For Airbyte Open Source: + +1. Navigate to the Airbyte Open Source dashboard. +2. Set up start date. + + + - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account. + + + + - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`. + +:::caution +Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs. +::: + +7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated. + +- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`. + +- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` + +8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + + + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +## Supported Streams + +This connector outputs the following full refresh streams: + +- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees) +- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches) +- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity) +- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators) +- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository) +- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations) +- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request) +- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags) +- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members) +- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user) +- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams) +- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members) +- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue) + +This connector outputs the following incremental streams: + +- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository) +- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment) +- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository) +- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits) +- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments) +- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events) +- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment) +- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository) +- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones) +- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue) +- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) +- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards) +- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns) +- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects) +- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2) +- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment) +- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest) +- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests) +- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases) +- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories) +- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository) +- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request) +- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers) +- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run) +- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository) +- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows) + +### Notes + +1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they: + + - read only new records; + - output only new records. + +2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental: + + - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs); + - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run); + - output only new records. + +3. Other 19 incremental streams are also incremental but with one difference, they: + + - read all records; + - output only new records. + Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits. + +4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help. + **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:** + +- `assignees` +- `branches` +- `collaborators` +- `issue_labels` +- `organizations` +- `pull_request_commits` +- `pull_request_stats` +- `repositories` +- `tags` +- `teams` +- `users` + +## Limitations & Troubleshooting + +
+ +Expand to see details about GitHub connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting +The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + +#### Permissions and scopes + +If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes. + +Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: + +- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github). +- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced. +- To sync the Projects stream, the repository must have the Projects feature enabled. + +### Troubleshooting + +* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions) + +
+ +### Tutorials + +Now that you have set up the GitHub source connector, check out the following GitHub tutorials: + +See github guidelines here. + +## Changelog + +
+ Expand to review + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | +| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | +| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | +| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ | +| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` | +| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. | +| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` | +| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | +| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | +| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | +| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | +| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` | +| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances | +| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages | +| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` | +| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter | +| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter | +| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI | +| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests | +| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired | +| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. | +| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec | +| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation | +| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config | +| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" | +| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format | +| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form | +| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification | +| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT | +| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream | +| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message | +| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read | +| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 | +| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes | +| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | +| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | +| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | +| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | +| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | +| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` | +| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories | +| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" | +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | +| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | +| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | +| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations | +| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" | +| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` | +| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream | +| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying | +| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream | +| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream | +| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching | +| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams | +| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages | +| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` | +| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` | +| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` | +| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec | +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | +| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | +| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | +| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta | +| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code | +| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` | +| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` | +| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name | +| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits | +| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time | +| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. | +| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | +| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | +| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub | +| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams | +| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos | +| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas | +| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability | +| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification | +| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches | +| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling | +| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams | +| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | +| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | +| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | +| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator | +| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation | +| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | +| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | +| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | +| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub | + +
+ +
\ No newline at end of file diff --git a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/incorrect_not_all_structure.md b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/incorrect_not_all_structure.md new file mode 100644 index 0000000000000..ddd5bd7c4bdd1 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/incorrect_not_all_structure.md @@ -0,0 +1,36 @@ +## GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com). + + + +## Prerequisites + +- Start Date - the start date to replicate your date. + +### For Airbyte Cloud: + +1. [Log into Airbyte](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source. +3. On the Set up the source page, select GitHub from the Source type dropdown. +4. Enter a name for the GitHub connector. + +### For Airbyte Open Source: + +1. Navigate to the Airbyte dashboard. + +## Supported sync modes + +The source supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync): + +### Tutorials + +Now that you have set up the source connector, check out the following tutorials: + +### Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | diff --git a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/invalid_links.md b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/invalid_links.md new file mode 100644 index 0000000000000..c6fb18c1fa911 --- /dev/null +++ b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/data/docs/invalid_links.md @@ -0,0 +1,305 @@ +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com/invalid-link) source connector. + + + +## Prerequisites + +- List of GitHub Repositories (and access for them in case they are private) + + +**For Airbyte Cloud:** + +- OAuth +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.invalid.com/integrations/sources/github#permissions-and-scopes)) + + + +**For Airbyte Open Source:** + +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.invalid.com/integrations/sources/github#permissions-and-scopes)) + + +## Setup guide + +### Step 1: Set up GitHub + +Create a [GitHub Account](https://github.com/invalid-link). + + +**Airbyte Open Source additional setup steps** + +Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens-that_do_not_exist/invalid-link). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +### Step 2: Set up the GitHub connector in Airbyte + + +**For Airbyte Cloud:** + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.invalid.com/workspaces) account. +2. In the left navigation bar, click **Sources**. +3. On the source selection page, select **GitHub** from the list of Sources. +4. Add a name for your GitHub connector. +5. To authenticate: + + + - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account. + + + + - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com/invalid-link) and then generate a [personal access token](https://github.com/settings/tokens/invalid-links). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`. + +:::caution +Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs. +::: + +7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated. + +- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`. + +- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` + +8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.invalid.com/en/rest/overview/rate-limits-for-the-rest-api). + + + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +## Supported Streams + +This connector outputs the following full refresh streams: + +- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees) +- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches) +- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity) +- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators) +- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository) +- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations) +- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request) +- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags) +- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members) +- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user) +- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams) +- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members) +- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue) + +This connector outputs the following incremental streams: + +- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository) +- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment) +- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository) +- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits) +- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments) +- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events) +- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment) +- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository) +- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones) +- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue) +- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) +- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards) +- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns) +- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects) +- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2) +- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment) +- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest) +- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests) +- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases) +- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories) +- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository) +- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request) +- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers) +- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run) +- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository) +- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows) + +### Notes + +1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they: + + - read only new records; + - output only new records. + +2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental: + + - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs); + - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run); + - output only new records. + +3. Other 19 incremental streams are also incremental but with one difference, they: + + - read all records; + - output only new records. + Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits. + +4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help. + **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:** + +- `assignees` +- `branches` +- `collaborators` +- `issue_labels` +- `organizations` +- `pull_request_commits` +- `pull_request_stats` +- `repositories` +- `tags` +- `teams` +- `users` + +## Limitations & Troubleshooting + +
+ +Expand to see details about GitHub connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting +The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + +#### Permissions and scopes + +If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes. + +Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: + +- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github). +- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams_do_not_exists) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced. +- To sync the Projects stream, the repository must have the Projects feature enabled. + +### Troubleshooting + +* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions) + +
+ +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | +| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | +| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | +| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ | +| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` | +| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. | +| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` | +| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | +| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | +| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | +| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | +| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` | +| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances | +| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages | +| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` | +| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter | +| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter | +| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI | +| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests | +| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired | +| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. | +| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec | +| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation | +| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config | +| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" | +| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format | +| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form | +| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification | +| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT | +| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream | +| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message | +| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read | +| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 | +| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes | +| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | +| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | +| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | +| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | +| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | +| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` | +| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories | +| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" | +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | +| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | +| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | +| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations | +| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" | +| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` | +| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream | +| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying | +| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream | +| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream | +| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching | +| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams | +| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages | +| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` | +| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` | +| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` | +| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec | +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | +| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | +| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | +| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta | +| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code | +| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` | +| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` | +| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name | +| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits | +| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time | +| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. | +| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | +| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | +| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub | +| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams | +| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos | +| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas | +| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability | +| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification | +| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches | +| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling | +| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams | +| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | +| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | +| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | +| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator | +| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation | +| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | +| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | +| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | +| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub | + +
\ No newline at end of file diff --git a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_documentation.py b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_documentation.py index f9aee256d218c..959ba6b48311e 100644 --- a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_documentation.py +++ b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_documentation.py @@ -1,6 +1,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from connectors_qa.checks import documentation +from connectors_qa.checks.documentation import documentation from connectors_qa.models import CheckStatus @@ -195,13 +195,20 @@ def test_pass_when_documentation_file_path_exists(self, mocker, tmp_path): assert f"User facing documentation file {documentation_file_path} exists" in result.message -class TestCheckDocumentationStructure: +class TestCheckDocumentationContent: + def test_fail_when_documentation_file_path_does_not_exists(self, mocker, tmp_path): # Arrange - connector = mocker.Mock(technical_name="test-connector", documentation_file_path=tmp_path / "not_existing_documentation.md") + connector = mocker.Mock( + technical_name="test-connector", + ab_internal_sl=300, + language="python", + connector_type="source", + documentation_file_path=tmp_path / "not_existing_documentation.md" + ) # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckDocumentationHeadersOrder()._run(connector) # Assert assert result.status == CheckStatus.FAILED @@ -209,10 +216,16 @@ def test_fail_when_documentation_file_path_does_not_exists(self, mocker, tmp_pat def test_fail_when_documentation_file_path_is_none(self, mocker): # Arrange - connector = mocker.Mock(technical_name="test-connector", documentation_file_path=None) + connector = mocker.Mock( + technical_name="test-connector", + ab_internal_sl=300, + language="python", + connector_type="source", + documentation_file_path=None + ) # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckDocumentationHeadersOrder()._run(connector) # Assert assert result.status == CheckStatus.FAILED @@ -225,62 +238,220 @@ def test_fail_when_documentation_file_is_empty(self, mocker, tmp_path): connector.documentation_file_path.write_text("") # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckDocumentationHeadersOrder()._run(connector) # Assert assert result.status == CheckStatus.FAILED assert "Documentation file is empty" in result.message - def test_fail_when_documentation_file_does_not_start_with_correct_header(self, mocker, tmp_path): - # Arrange - documentation_file_path = tmp_path / "documentation.md" - connector = mocker.Mock( - technical_name="test-connector", metadata={"name": "Test Connector"}, documentation_file_path=documentation_file_path - ) - connector.documentation_file_path.write_text("# Test Another connector") + def test_fail_when_documentation_file_has_invalid_links(self, connector_with_invalid_links_in_documentation): + # Act + result = documentation.CheckDocumentationLinks()._run(connector_with_invalid_links_in_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector documentation uses invalid links:" in result.message + assert "https://github.com/invalid-link with 404 status code" in result.message + def test_fail_when_documentation_file_has_missing_headers(self, connector_with_invalid_documentation): # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckDocumentationHeadersOrder()._run(connector_with_invalid_documentation) # Assert assert result.status == CheckStatus.FAILED - assert "Connector documentation does not follow the guidelines:" in result.message - assert "The connector name is not used as the main header in the documentation. Expected: '# Test Connector'" in result.message + assert "Documentation headers ordering/naming doesn't follow guidelines:" in result.message + assert "Actual Heading: 'For Airbyte Cloud:'. Expected Heading: 'Setup guide'" in result.message - def test_fail_when_documentation_file_has_missing_sections(self, mocker, tmp_path): - # Arrange - documentation_file_path = tmp_path / "documentation.md" - connector = mocker.Mock( - technical_name="test-connector", metadata={"name": "Test Connector"}, documentation_file_path=documentation_file_path + def test_fail_when_documentation_file_not_have_all_required_fields_in_prerequisites_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec()._run( + connector_with_invalid_documentation ) - connector.documentation_file_path.write_text("# Test Connector") + # Assert + assert result.status == CheckStatus.FAILED + assert "Missing descriptions for required spec fields: github repositories" in result.message + + def test_fail_when_documentation_file_has_invalid_source_section_content( + self, + connector_with_invalid_documentation + ): # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckSourceSectionContent()._run(connector_with_invalid_documentation) # Assert assert result.status == CheckStatus.FAILED - assert "Connector documentation does not follow the guidelines:" in result.message - for section in documentation.CheckDocumentationStructure.expected_sections: - assert f"Connector documentation is missing a '{section.replace('#', '').strip()}' section" in result.message + assert "Connector GitHub section content does not follow standard template:" in result.message + assert "+ This page contains the setup guide and reference information for the [GitHub]({docs_link}) source connector." in result.message - def test_pass_when_documentation_file_has_correct_structure(self, mocker, tmp_path): - # Arrange - documentation_file_path = tmp_path / "documentation.md" - connector = mocker.Mock( - technical_name="test-connector", metadata={"name": "Test Connector"}, documentation_file_path=documentation_file_path - ) - connector.documentation_file_path.write_text( - "# Test Connector\n## Prerequisites\n## Setup guide\n## Supported sync modes\n## Supported streams\n## Changelog" - ) + def test_fail_when_documentation_file_has_invalid_for_airbyte_cloud_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckForAirbyteCloudSectionContent()._run(connector_with_invalid_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector For Airbyte Cloud: section content does not follow standard template:" in result.message + assert "+ 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account." in result.message + + def test_fail_when_documentation_file_has_invalid_for_airbyte_open_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckForAirbyteOpenSectionContent()._run(connector_with_invalid_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector For Airbyte Open Source: section content does not follow standard template" in result.message + assert "+ 1. Navigate to the Airbyte Open Source dashboard." in result.message + + def test_fail_when_documentation_file_has_invalid_supported_sync_modes_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckSupportedSyncModesSectionContent()._run(connector_with_invalid_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector Supported sync modes section content does not follow standard template:" in result.message + assert ("+ The GitHub source connector supports the following" + " [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):") in result.message + + def test_fail_when_documentation_file_has_invalid_tutorials_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckTutorialsSectionContent()._run(connector_with_invalid_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector Tutorials section content does not follow standard template:" in result.message + assert "+ Now that you have set up the GitHub source connector, check out the following GitHub tutorials:" in result.message + + def test_fail_when_documentation_file_has_invalid_changelog_section_content( + self, + connector_with_invalid_documentation + ): + # Act + result = documentation.CheckChangelogSectionContent()._run(connector_with_invalid_documentation) + + # Assert + assert result.status == CheckStatus.FAILED + assert "Connector Changelog section content does not follow standard template:" in result.message + assert "+
\n+ Expand to review\n+
" in result.message + + def test_pass_when_documentation_file_has_correct_headers(self, connector_with_correct_documentation): + # Act + result = documentation.CheckDocumentationHeadersOrder()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert result.message == "Documentation guidelines are followed" + + def test_pass_when_documentation_file_has_correct_prerequisites_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckPrerequisitesSectionDescribesRequiredFieldsFromSpec()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "All required fields from spec are present in the connector documentation" in result.message + + def test_pass_when_documentation_file_has_correct_source_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckSourceSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + + def test_pass_when_documentation_file_has_correct_for_airbyte_cloud_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckForAirbyteCloudSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + + def test_pass_when_documentation_file_has_correct_for_airbyte_open_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckForAirbyteOpenSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + + def test_pass_when_documentation_file_has_correct_supported_sync_modes_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckSupportedSyncModesSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + + def test_pass_when_documentation_file_has_correct_tutorials_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckTutorialsSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + def test_pass_when_documentation_file_has_correct_headers_order( + self, + connector_with_correct_documentation + ): # Act - result = documentation.CheckDocumentationStructure()._run(connector) + result = documentation.CheckDocumentationHeadersOrder()._run(connector_with_correct_documentation) # Assert assert result.status == CheckStatus.PASSED assert "Documentation guidelines are followed" in result.message + def test_pass_when_documentation_file_has_correct_changelog_section_content( + self, + connector_with_correct_documentation + ): + # Act + result = documentation.CheckChangelogSectionContent()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation guidelines are followed" in result.message + + def test_pass_when_all_links_are_valid(self, connector_with_correct_documentation): + # Act + result = documentation.CheckDocumentationLinks()._run(connector_with_correct_documentation) + + # Assert + assert result.status == CheckStatus.PASSED + assert "Documentation links are valid" in result.message + class TestCheckChangelogEntry: def test_fail_when_documentation_file_path_does_not_exists(self, mocker, tmp_path): diff --git a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_testing.py b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_testing.py index d2b62d1aa4cd3..c6c997b03fd69 100644 --- a/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_testing.py +++ b/airbyte-ci/connectors/connectors_qa/tests/unit_tests/test_checks/test_testing.py @@ -110,7 +110,13 @@ def test_check_always_passes_when_usage_threshold_is_not_met(self, mocker, cases for usage_value in usage_values_to_test: for metadata_case in cases_to_test: # Arrange - connector = mocker.MagicMock(cloud_usage=usage_value, metadata=metadata_case, language=ConnectorLanguage.PYTHON, connector_type="source") + connector = mocker.MagicMock( + cloud_usage=usage_value, + metadata=metadata_case, + language=ConnectorLanguage.PYTHON, + connector_type="source", + ab_internal_sl=100 + ) # Act result = testing.AcceptanceTestsEnabledCheck().run(connector) diff --git a/docs/contributing-to-airbyte/resources/qa-checks.md b/docs/contributing-to-airbyte/resources/qa-checks.md index cb1c1b19f3a54..48ee995f01796 100644 --- a/docs/contributing-to-airbyte/resources/qa-checks.md +++ b/docs/contributing-to-airbyte/resources/qa-checks.md @@ -13,6 +13,7 @@ They are by no mean replacing the need for a manual review of the connector code _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ When a breaking change is introduced, we check that a migration guide is available. It should be stored under `./docs/integrations/s/-migrations.md`. @@ -23,15 +24,234 @@ This document should contain a section for each breaking change, in order of the _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ The user facing connector documentation should be stored under `./docs/integrations/s/.md`. +### Links used in connector documentation are valid + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should update invalid links in connector documentation. For links that are used as example and return 404 status code, use `example: ` before link to skip it. + +### Connectors documentation headers structure, naming and order follow our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +This check expects the following order of headers in the documentation: + +```` + + + # CONNECTOR_NAME_FROM_METADATA + + ## Prerequisites + + ## Setup guide + + ## Set up CONNECTOR_NAME_FROM_METADATA + + ### For Airbyte Cloud: + + ### For Airbyte Open Source: + + ### CONNECTOR_SPECIFIC_FEATURES + + ## Set up the CONNECTOR_NAME_FROM_METADATA connector in Airbyte + + ### For Airbyte Cloud: + + ### For Airbyte Open Source: + + ## CONNECTOR_SPECIFIC_FEATURES + + ## Supported sync modes + + ## Supported Streams + + ## CONNECTOR_SPECIFIC_FEATURES + + ### Performance considerations + + ## Data type map + + ## Limitations & Troubleshooting + + ### CONNECTOR_SPECIFIC_FEATURES + + ### Tutorials + + ## Changelog + + +```` + + +List of not required headers, which can be not exist in the documentation and their strict check will be skipped: + + - Set up the CONNECTOR_NAME_FROM_METADATA connector in Airbyte + + - For Airbyte Cloud: (as subtitle of Set up CONNECTOR_NAME_FROM_METADATA) + + - For Airbyte Open Source: (as subtitle of Set up CONNECTOR_NAME_FROM_METADATA) + + - CONNECTOR_SPECIFIC_FEATURES (but this headers should be on a right place according to expected order) + + - Performance considerations + + - Data type map + + - Limitations & Troubleshooting + + - Tutorials + + +### Prerequisites section of the documentation describes all required fields from specification + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should update `Prerequisites` section with description for all required fields from source specification. Having described all required fields in a one place helps Airbyte users easily set up the source connector. +If spec has required credentials/access_token/refresh_token etc, check searches for one of ["account", "auth", "credentials", "access", "client"] words. No need to add credentials/access_token/refresh_token etc to the section + +### Main Source Section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that CONNECTOR_NAME_FROM_METADATA header section content follows standard template: + +```` + + + +This page contains the setup guide and reference information for the [CONNECTOR_NAME_FROM_METADATA]({docs_link}) source connector. + + + + +```` + +### 'For Airbyte Cloud:' section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that For Airbyte Cloud: header section content follows standard template: + +```` + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source. +3. On the Set up the source page, select CONNECTOR_NAME_FROM_METADATA from the Source type dropdown. +4. Enter a name for the CONNECTOR_NAME_FROM_METADATA connector. + +```` + +### 'For Airbyte Open Source:' section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that For Airbyte Open Source: header section content follows standard template: + +```` + +1. Navigate to the Airbyte Open Source dashboard. + +```` + +### 'Supported sync modes' section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that Supported sync modes header section content follows standard template: + +```` + +The CONNECTOR_NAME_FROM_METADATA source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + + +```` + +### 'Tutorials' section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that Tutorials header section content follows standard template: + +```` + +Now that you have set up the CONNECTOR_NAME_FROM_METADATA source connector, check out the following CONNECTOR_NAME_FROM_METADATA tutorials: + + +```` + +### 'Changelog' section of the documentation follows our guidelines + +_Applies to the following connector types: source_ +_Applies to the following connector languages: python, low-code_ +_Applies to connector with any support level_ +_Applies to connector with 300 internal support level_ +_Applies to connector with any Airbyte usage level_ + +The user facing connector documentation should follow the guidelines defined in the [standard template](../../../airbyte-ci/connectors/connectors_qa/src/connectors_qa/checks/documentation/templates/template.md.j2). + +Check verifies that Changelog header section content follows standard template: + +```` +
+ Expand to review +
+```` + ### Connectors must have a changelog entry for each version _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Each new version of a connector must have a changelog entry defined in the user facing documentation in `./docs/integrations/s/.md`. @@ -43,6 +263,7 @@ Each new version of a connector must have a changelog entry defined in the user _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must have a `metadata.yaml` file at the root of their directory. This file is used to build our connector registry. Its structure must follow our metadata schema. Field values are also validated. This is to ensure that all connectors have the required metadata fields and that the metadata is valid. More details in this [documentation](https://docs.airbyte.com/connector-development/connector-metadata-file). @@ -52,6 +273,7 @@ Connectors must have a `metadata.yaml` file at the root of their directory. This _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must have a language tag in their metadata. It must be set in the `tags` field in metadata.yaml. The values can be `language:python` or `language:java`. This checks infers the correct language tag based on the presence of certain files in the connector directory. @@ -61,6 +283,7 @@ Connectors must have a language tag in their metadata. It must be set in the `ta _Applies to the following connector types: source, destination_ _Applies to the following connector languages: python, low-code_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Python connectors must have a CDK tag in their metadata. It must be set in the `tags` field in metadata.yaml. The values can be `cdk:low-code`, `cdk:python`, or `cdk:file`. @@ -70,6 +293,7 @@ Python connectors must have a CDK tag in their metadata. It must be set in the ` _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ If the connector version has a breaking change, the deadline field must be set to at least a week in the future. @@ -79,6 +303,7 @@ If the connector version has a breaking change, the deadline field must be set t _Applies to the following connector types: source_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with certified support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Certified source connectors must have a value filled out for `maxSecondsBetweenMessages` in metadata. This value represents the maximum number of seconds we could expect between messages for API connectors. And it's used by platform to tune connectors heartbeat timeout. The value must be set in the 'data' field in connector's `metadata.yaml` file. @@ -90,6 +315,7 @@ Certified source connectors must have a value filled out for `maxSecondsBetweenM _Applies to the following connector types: source, destination_ _Applies to the following connector languages: python, low-code_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must use [Poetry](https://python-poetry.org/) for dependency management. This is to ensure that all connectors use a dependency management tool which locks dependencies and ensures reproducible installs. @@ -99,6 +325,7 @@ Connectors must use [Poetry](https://python-poetry.org/) for dependency manageme _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must be licensed under the MIT or Elv2 license. This is to ensure that all connectors are licensed under a permissive license. More details in our [License FAQ](https://docs.airbyte.com/developer-guides/licenses/license-faq). @@ -108,6 +335,7 @@ Connectors must be licensed under the MIT or Elv2 license. This is to ensure tha _Applies to the following connector types: source, destination_ _Applies to the following connector languages: python, low-code_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors license in metadata.yaml and pyproject.toml file must match. This is to ensure that all connectors are consistently licensed. @@ -117,6 +345,7 @@ Connectors license in metadata.yaml and pyproject.toml file must match. This is _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connector version must follow the Semantic Versioning scheme. This is to ensure that all connectors follow a consistent versioning scheme. Refer to our [Semantic Versioning for Connectors](https://docs.airbyte.com/contributing-to-airbyte/#semantic-versioning-for-connectors) for more details. @@ -126,6 +355,7 @@ Connector version must follow the Semantic Versioning scheme. This is to ensure _Applies to the following connector types: source, destination_ _Applies to the following connector languages: python, low-code_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connector version in metadata.yaml and pyproject.toml file must match. This is to ensure that connector release is consistent. @@ -135,6 +365,7 @@ Connector version in metadata.yaml and pyproject.toml file must match. This is t _Applies to the following connector types: source_ _Applies to the following connector languages: python, low-code_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Python connectors must have [PyPi](https://pypi.org/) publishing enabled in their `metadata.yaml` file. This is declared by setting `remoteRegistries.pypi.enabled` to `true` in metadata.yaml. This is to ensure that all connectors can be published to PyPi and can be used in `PyAirbyte`. @@ -144,6 +375,7 @@ Python connectors must have [PyPi](https://pypi.org/) publishing enabled in thei _Applies to the following connector types: source, destination_ _Applies to the following connector languages: manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Manifest-only connectors must use `airbyte/source-declarative-manifest` as their base image. @@ -155,6 +387,7 @@ Manifest-only connectors must use `airbyte/source-declarative-manifest` as their _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Each connector must have an icon available in at the root of the connector code directory. It must be an SVG file named `icon.svg` and must be a square. @@ -166,6 +399,7 @@ Each connector must have an icon available in at the root of the connector code _Applies to the following connector types: source, destination_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must use HTTPS only when making requests to external services. @@ -175,6 +409,7 @@ Connectors must use HTTPS only when making requests to external services. _Applies to the following connector types: source, destination_ _Applies to the following connector languages: python, low-code, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with any Airbyte usage level_ Connectors must use our Python connector base image (`docker.io/airbyte/python-connector-base`), declared through the `connectorBuildOptions.baseImage` in their `metadata.yaml`. @@ -187,6 +422,7 @@ This is to ensure that all connectors use a base image which is maintained and h _Applies to the following connector types: source_ _Applies to the following connector languages: java, low-code, python, manifest-only_ _Applies to connector with any support level_ +_Applies to connector with any internal support level_ _Applies to connector with medium, high Airbyte usage level_ Medium to High Use Connectors must enable acceptance tests via the `connectorTestSuitesOptions.suite:acceptanceTests` in their respective metadata.yaml file to ensure that the connector is working as expected.