Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

airbyte-ci: refactor gradle containers, use amazoncorretto #30384

Merged
merged 15 commits into from
Sep 13, 2023
Merged
1 change: 1 addition & 0 deletions airbyte-ci/connectors/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ This command runs the Python tests for a airbyte-ci poetry package.
## Changelog
| Version | PR | Description |
|---------| --------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------|
| 1.2.1 | [#30384](https://github.com/airbytehq/airbyte/pull/30384) | Java connector test performance fixes. |
| 1.2.0 | [#30330](https://github.com/airbytehq/airbyte/pull/30330) | Add `--metadata-query` option to connectors command |
| 1.1.3 | [#30314](https://github.com/airbytehq/airbyte/pull/30314) | Stop patching gradle files to make them work with airbyte-ci. |
| 1.1.2 | [#30279](https://github.com/airbytehq/airbyte/pull/30279) | Fix correctness issues in layer caching by making atomic execution groupings |
Expand Down
148 changes: 58 additions & 90 deletions airbyte-ci/connectors/pipelines/pipelines/actions/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@
from typing import TYPE_CHECKING, Callable, List, Optional

import toml
from dagger import CacheVolume, Client, Container, DaggerError, Directory, File, Platform, Secret
from dagger import CacheSharingMode, CacheVolume, Client, Container, DaggerError, Directory, File, Platform, Secret
from dagger.engine._version import CLI_VERSION as dagger_engine_version
from pipelines import consts
from pipelines.consts import (
AMAZONCORRETTO_IMAGE,
CI_CREDENTIALS_SOURCE_PATH,
CONNECTOR_OPS_SOURCE_PATHSOURCE_PATH,
CONNECTOR_TESTING_REQUIREMENTS,
DOCKER_HOST_NAME,
DOCKER_HOST_PORT,
DOCKER_TMP_VOLUME_NAME,
LICENSE_SHORT_FILE_PATH,
PYPROJECT_TOML_FILE_PATH,
)
Expand Down Expand Up @@ -470,14 +474,34 @@ def with_global_dockerd_service(dagger_client: Client) -> Container:
Container: The container running dockerd as a service
"""
return (
dagger_client.container()
.from_(consts.DOCKER_DIND_IMAGE)
.with_mounted_cache(
"/tmp",
dagger_client.cache_volume("shared-tmp"),
dagger_client.container().from_(consts.DOCKER_DIND_IMAGE)
# We set this env var because we need to use a non-default zombie reaper setting.
# The reason for this is that by default it will want to set its parent process ID to 1 when reaping.
# This won't be possible because of container-ception: dind is running inside the dagger engine.
# See https://github.com/krallin/tini#subreaping for details.
.with_env_variable("TINI_SUBREAPER", "")
# Similarly, because of container-ception, we have to use the fuse-overlayfs storage engine.
.with_exec(
sh_dash_c(
[
# Update package metadata.
"apk update",
# Install the storage driver package.
"apk add fuse-overlayfs",
# Update daemon config with storage driver.
"mkdir /etc/docker",
'(echo {\\"storage-driver\\": \\"fuse-overlayfs\\"} > /etc/docker/daemon.json)',
]
)
)
# Expose the docker host port.
.with_exposed_port(DOCKER_HOST_PORT)
# Mount the docker cache volumes.
.with_mounted_cache("/tmp", dagger_client.cache_volume(DOCKER_TMP_VOLUME_NAME))
# Run the docker daemon and bind it to the exposed TCP port.
.with_exec(
["dockerd", "--log-level=error", f"--host=tcp://0.0.0.0:{DOCKER_HOST_PORT}", "--tls=false"], insecure_root_capabilities=True
)
.with_exposed_port(2375)
.with_exec(["dockerd", "--log-level=error", "--host=tcp://0.0.0.0:2375", "--tls=false"], insecure_root_capabilities=True)
)


Expand All @@ -493,16 +517,14 @@ def with_bound_docker_host(
Returns:
Container: The container bound to the docker host.
"""
dockerd = context.dockerd_service
docker_hostname = "global-docker-host"
return (
container.with_env_variable("DOCKER_HOST", f"tcp://{docker_hostname}:2375")
.with_service_binding(docker_hostname, dockerd)
.with_mounted_cache("/tmp", context.dagger_client.cache_volume("shared-tmp"))
container.with_env_variable("DOCKER_HOST", f"tcp://{DOCKER_HOST_NAME}:{DOCKER_HOST_PORT}")
.with_service_binding(DOCKER_HOST_NAME, context.dockerd_service)
.with_mounted_cache("/tmp", context.dagger_client.cache_volume(DOCKER_TMP_VOLUME_NAME))
)


def bound_docker_host(context: ConnectorContext) -> Container:
def bound_docker_host(context: ConnectorContext) -> Callable[[Container], Container]:
def bound_docker_host_inner(container: Container) -> Container:
return with_bound_docker_host(context, container)

Expand All @@ -522,78 +544,6 @@ def with_docker_cli(context: ConnectorContext) -> Container:
return with_bound_docker_host(context, docker_cli)


def with_gradle(
context: ConnectorContext,
sources_to_include: List[str] = None,
bind_to_docker_host: bool = True,
) -> Container:
"""Create a container with Gradle installed and bound to a persistent docker host.

Args:
context (ConnectorContext): The current connector context.
sources_to_include (List[str], optional): List of additional source path to mount to the container. Defaults to None.
bind_to_docker_host (bool): Whether to bind the gradle container to a docker host.

Returns:
Container: A container with Gradle installed and Java sources from the repository.
"""

include = [
".root",
".env",
"build.gradle",
"deps.toml",
"gradle.properties",
"gradle",
"gradlew",
"LICENSE_SHORT",
"settings.gradle",
"build.gradle",
"tools/gradle",
"spotbugs-exclude-filter-file.xml",
"buildSrc",
"tools/bin/build_image.sh",
"tools/lib/lib.sh",
"tools/gradle/codestyle",
"pyproject.toml",
]

if sources_to_include:
include += sources_to_include
# TODO re-enable once we have fixed the over caching issue
# gradle_dependency_cache: CacheVolume = context.dagger_client.cache_volume("gradle-dependencies-caching")
# gradle_build_cache: CacheVolume = context.dagger_client.cache_volume(f"{context.connector.technical_name}-gradle-build-cache")

openjdk_with_docker = (
context.dagger_client.container()
.from_("openjdk:17.0.1-jdk-slim")
.with_exec(
sh_dash_c(
[
"apt-get update",
"apt-get install -y curl jq rsync npm pip",
]
)
)
.with_env_variable("VERSION", consts.DOCKER_VERSION)
.with_exec(sh_dash_c(["curl -fsSL https://get.docker.com | sh"]))
.with_env_variable("GRADLE_HOME", "/root/.gradle")
.with_exec(["mkdir", "/airbyte"])
.with_workdir("/airbyte")
.with_mounted_directory("/airbyte", context.get_repo_dir(".", include=include))
.with_exec(["mkdir", "-p", consts.GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH])
# TODO (ben) reenable once we have fixed the over caching issue
# .with_mounted_cache(consts.GRADLE_BUILD_CACHE_PATH, gradle_build_cache, sharing=CacheSharingMode.LOCKED)
# .with_mounted_cache(consts.GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH, gradle_dependency_cache)
.with_env_variable("GRADLE_RO_DEP_CACHE", consts.GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH)
)

if bind_to_docker_host:
return with_bound_docker_host(context, openjdk_with_docker)
else:
return openjdk_with_docker


async def load_image_to_docker_host(context: ConnectorContext, tar_file: File, image_tag: str):
"""Load a docker image tar archive to the docker host.

Expand Down Expand Up @@ -678,30 +628,47 @@ def with_integration_base(context: PipelineContext, build_platform: Platform) ->
)


def with_integration_base_java(context: PipelineContext, build_platform: Platform, jdk_version: str = "17.0.4") -> Container:
def with_integration_base_java(context: PipelineContext, build_platform: Platform) -> Container:
postamar marked this conversation as resolved.
Show resolved Hide resolved
integration_base = with_integration_base(context, build_platform)
yum_packages_to_install = [
"tar", # required to untar java connector binary distributions.
"openssl", # required because we need to ssh and scp sometimes.
"findutils", # required for xargs, which is shipped as part of findutils.
]
return (
context.dagger_client.container(platform=build_platform)
.from_(f"amazoncorretto:{jdk_version}")
.with_directory("/airbyte", integration_base.directory("/airbyte"))
# Use a linux+jdk base image with long-term support, such as amazoncorretto.
.from_(AMAZONCORRETTO_IMAGE)
# Install a bunch of packages as early as possible.
.with_exec(
sh_dash_c(
[
# Update first, but in the same .with_exec step as the package installation.
# Otherwise, we risk caching stale package URLs.
"yum update -y",
"yum install -y tar openssl",
#
f"yum install -y {' '.join(yum_packages_to_install)}",
# Remove any dangly bits.
"yum clean all",
]
)
)
# Add what files we need to the /airbyte directory.
# Copy base.sh from the airbyte/integration-base image.
.with_directory("/airbyte", integration_base.directory("/airbyte"))
.with_workdir("/airbyte")
# Download a utility jar from the internet.
.with_file("dd-java-agent.jar", context.dagger_client.http("https://dtdg.co/latest-java-tracer"))
# Copy javabase.sh from the git repo.
.with_file("javabase.sh", context.get_repo_dir("airbyte-integrations/bases/base-java", include=["javabase.sh"]).file("javabase.sh"))
# Set a bunch of env variables used by base.sh.
.with_env_variable("AIRBYTE_SPEC_CMD", "/airbyte/javabase.sh --spec")
.with_env_variable("AIRBYTE_CHECK_CMD", "/airbyte/javabase.sh --check")
.with_env_variable("AIRBYTE_DISCOVER_CMD", "/airbyte/javabase.sh --discover")
.with_env_variable("AIRBYTE_READ_CMD", "/airbyte/javabase.sh --read")
.with_env_variable("AIRBYTE_WRITE_CMD", "/airbyte/javabase.sh --write")
.with_env_variable("AIRBYTE_ENTRYPOINT", "/airbyte/base.sh")
# Set image labels.
.with_label("io.airbyte.version", "0.1.2")
.with_label("io.airbyte.name", "airbyte/integration-base-java")
)
Expand Down Expand Up @@ -808,6 +775,7 @@ def with_integration_base_java_and_normalization(context: PipelineContext, build
.with_exec(
sh_dash_c(
[
"yum update -y",
f"yum install -y {' '.join(yum_packages_to_install)}",
"yum clean all",
"alternatives --install /usr/bin/python python /usr/bin/python3 60",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,42 @@


class BuildConnectorDistributionTar(GradleTask):
"""
A step to build a Java connector image using the distTar Gradle task.
"""

title = "Build connector tar"
gradle_task_name = "distTar"


async def _run(self) -> StepResult:
cdk_includes = ["./airbyte-cdk/java/airbyte-cdk/**"]
with_built_tar = (
environments.with_gradle(
self.context,
self.build_include + cdk_includes,
)
.with_exec(["./gradlew", ":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"])
.with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir())
.with_exec(self._get_gradle_command())
.with_workdir(f"{self.context.connector.code_directory}/build/distributions")
)
result = await super()._run()
postamar marked this conversation as resolved.
Show resolved Hide resolved
if result.status is not StepStatus.SUCCESS:
return result

with_built_tar = result.output_artifact.with_workdir(f"{self.context.connector.code_directory}/build/distributions")
postamar marked this conversation as resolved.
Show resolved Hide resolved
distributions = await with_built_tar.directory(".").entries()
tar_files = [f for f in distributions if f.endswith(".tar")]
await self._export_gradle_dependency_cache(with_built_tar)
if len(tar_files) == 1:
return StepResult(
self,
StepStatus.SUCCESS,
stdout="The tar file for the current connector was successfully built.",
stdout="The distribution tar file for the current java connector was built.",
output_artifact=with_built_tar.file(tar_files[0]),
)
elif len(tar_files) == 0:
return StepResult(
self,
StepStatus.FAILURE,
stderr="The distribution tar file for the current java connector was not built.",
)
else:
return StepResult(
self,
StepStatus.FAILURE,
stderr="The distributions directory contains multiple connector tar files. We can't infer which one should be used. Please review and delete any unnecessary tar files.",
stderr="The distributions directory for this java connector contains multiple tar files. "
"We can't infer which one should be used. "
"Please review and delete any unnecessary tar files.",
)


Expand Down
8 changes: 6 additions & 2 deletions airbyte-ci/connectors/pipelines/pipelines/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@
CONNECTOR_OPS_SOURCE_PATHSOURCE_PATH = "airbyte-ci/connectors/connector_ops"
BUILD_PLATFORMS = [Platform("linux/amd64"), Platform("linux/arm64")]
LOCAL_BUILD_PLATFORM = Platform(f"linux/{platform.machine()}")
AMAZONCORRETTO_IMAGE = "amazoncorretto:17.0.8-al2023"
DOCKER_VERSION = "24.0.2"
DOCKER_DIND_IMAGE = "docker:24-dind"
DOCKER_CLI_IMAGE = "docker:24-cli"
DOCKER_DIND_IMAGE = f"docker:{DOCKER_VERSION}-dind"
DOCKER_CLI_IMAGE = f"docker:{DOCKER_VERSION}-cli"
GRADLE_CACHE_PATH = "/root/.gradle/caches"
GRADLE_BUILD_CACHE_PATH = f"{GRADLE_CACHE_PATH}/build-cache-1"
GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH = "/root/gradle_dependency_cache"
LOCAL_REPORTS_PATH_ROOT = "airbyte-ci/connectors/pipelines/pipeline_reports/"
GCS_PUBLIC_DOMAIN = "https://storage.cloud.google.com"
DOCKER_HOST_NAME = "global-docker-host"
DOCKER_HOST_PORT = 2375
DOCKER_TMP_VOLUME_NAME = "shared-tmp"
1 change: 1 addition & 0 deletions airbyte-ci/connectors/pipelines/pipelines/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class PipelineContext:
+ glob("**/.mypy_cache", recursive=True)
+ glob("**/.DS_Store", recursive=True)
+ glob("**/airbyte_ci_logs", recursive=True)
+ glob("**/.gradle", recursive=True)
)

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,14 @@ class FormatConnectorCode(GradleTask):
"""

title = "Format connector code"
gradle_task_name = "format"

async def _run(self) -> StepResult:
formatted = (
environments.with_gradle(self.context, self.build_include, bind_to_docker_host=self.BIND_TO_DOCKER_HOST)
.with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir())
.with_exec(["./gradlew", "format"])
)
exit_code, stdout, stderr = await get_exec_result(formatted)
result = await super()._run()
return StepResult(
self,
self.get_step_status_from_exit_code(exit_code),
stderr=stderr,
stdout=stdout,
output_artifact=formatted.directory(str(self.context.connector.code_directory)),
result.status,
stderr=result.stderr,
stdout=result.stdout,
output_artifact=result.output_artifact.directory(str(self.context.connector.code_directory)),
)
Loading