Skip to content

Commit

Permalink
Python base image: create airbyte user
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere committed Mar 28, 2024
1 parent a776f6c commit 13f5b18
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 8 deletions.
1 change: 1 addition & 0 deletions airbyte-ci/connectors/base_images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ RUN mkdir /usr/share/nltk_data
| Version | Published | Docker Image Address | Changelog |
|---------|-----------|--------------|-----------|
| 1.2.0 || docker.io/airbyte/python-connector-base:1.2.0@sha256:c22a9d97464b69d6ef01898edf3f8612dc11614f05a84984451dde195f337db9 | Add CDK system dependencies: nltk data, tesseract, poppler. |
| 1.2.0-rc.1 || docker.io/airbyte/python-connector-base:1.2.0-rc.1@sha256:280cec373cd5b7192d29de2967e2208f16621186a49d0b5c34bee4e2b2bd0bd3 | Create airbyte user |
| 1.1.0 || docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c | Install socat |
| 1.0.0 || docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 | Initial release: based on Python 3.9.18, on slim-bookworm system, with pip==23.2.1 and poetry==1.6.1 |

Expand Down
19 changes: 18 additions & 1 deletion airbyte-ci/connectors/base_images/base_images/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ class AirbyteConnectorBaseImage(ABC):
Please do not declare any Dagger with_exec instruction in this class as in the abstract class context we have no guarantee about the underlying system used in the base image.
"""

USER: str = "airbyte"
USER_ID: int = 1000
CACHE_DIR_PATH: str = "/custom_cache"
AIRBYTE_DIR_PATH: str = "/airbyte"

@final
def __init__(self, dagger_client: dagger.Client, version: semver.VersionInfo):
"""Initializes the Airbyte base image.
Expand Down Expand Up @@ -98,4 +103,16 @@ def get_base_container(self, platform: dagger.Platform) -> dagger.Container:
Returns:
dagger.Container: The container using the base python image.
"""
return self.dagger_client.pipeline(self.name_with_tag).container(platform=platform).from_(self.root_image.address)
return (
self.dagger_client.pipeline(self.name_with_tag)
.container(platform=platform)
.from_(self.root_image.address)
# Set the timezone to UTC
.with_exec(["ln", "-snf", "/usr/share/zoneinfo/Etc/UTC", "/etc/localtime"], skip_entrypoint=True)
# Install socat 1.7.4.4
.with_exec(["sh", "-c", "apt update && apt-get install -y socat=1.7.4.4-2"], skip_entrypoint=True)
.with_exec(["adduser", "--uid", str(self.USER_ID), "--system", "--group", "--no-create-home", self.USER], skip_entrypoint=True)
.with_exec(["mkdir", "--mode", "755", self.CACHE_DIR_PATH], skip_entrypoint=True)
.with_exec(["mkdir", "--mode", "755", self.AIRBYTE_DIR_PATH], skip_entrypoint=True)
.with_exec(["chown", f"{self.USER}:{self.USER}", self.AIRBYTE_DIR_PATH], skip_entrypoint=True)
)
4 changes: 3 additions & 1 deletion airbyte-ci/connectors/base_images/base_images/hacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def get_container_dockerfile(container) -> str:
"""

lineage = [
field for field in list(container._ctx.selections) if isinstance(field, dagger.api.base.Field) and field.type_name == "Container"
field
for field in list(container._ctx.selections)
if isinstance(field, dagger.client._core.Field) and field.type_name == "Container"
]
dockerfile = []

Expand Down
19 changes: 13 additions & 6 deletions airbyte-ci/connectors/base_images/base_images/python/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class AirbytePythonConnectorBaseImage(bases.AirbyteConnectorBaseImage):
},
}

@property
def pip_cache_path(self) -> str:
return f"{self.CACHE_DIR_PATH}/pip"

def install_cdk_system_dependencies(self) -> Callable:
def get_nltk_data_dir() -> dagger.Directory:
"""Returns a dagger directory containing the nltk data.
Expand Down Expand Up @@ -67,7 +71,7 @@ def with_file_based_connector_dependencies(container: dagger.Container) -> dagge
- nltk data
"""
container = with_tesseract_and_poppler(container)
container = container.with_exec(["mkdir", self.nltk_data_path], skip_entrypoint=True).with_directory(
container = container.with_exec(["mkdir", "-p", "755", self.nltk_data_path], skip_entrypoint=True).with_directory(
self.nltk_data_path, get_nltk_data_dir()
)
return container
Expand All @@ -90,18 +94,15 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container:

return (
self.get_base_container(platform)
.with_mounted_cache("/root/.cache/pip", pip_cache_volume)
# Set the timezone to UTC
.with_exec(["ln", "-snf", "/usr/share/zoneinfo/Etc/UTC", "/etc/localtime"])
.with_mounted_cache(self.pip_cache_path, pip_cache_volume, owner=self.USER)
.with_env_variable("PIP_CACHE_DIR", self.pip_cache_path)
# Upgrade pip to the expected version
.with_exec(["pip", "install", "--upgrade", "pip==23.2.1"])
# Declare poetry specific environment variables
.with_env_variable("POETRY_VIRTUALENVS_CREATE", "false")
.with_env_variable("POETRY_VIRTUALENVS_IN_PROJECT", "false")
.with_env_variable("POETRY_NO_INTERACTION", "1")
.with_exec(["pip", "install", "poetry==1.6.1"], skip_entrypoint=True)
# Install socat 1.7.4.4
.with_exec(["sh", "-c", "apt update && apt-get install -y socat=1.7.4.4-2"])
# Install CDK system dependencies
.with_(self.install_cdk_system_dependencies())
)
Expand All @@ -117,6 +118,12 @@ async def run_sanity_checks(self, platform: dagger.Platform):
container = self.get_container(platform)
await base_sanity_checks.check_timezone_is_utc(container)
await base_sanity_checks.check_a_command_is_available_using_version_option(container, "bash")
await base_sanity_checks.check_user_exists(container, self.USER, expected_uid=self.USER_ID, expected_gid=self.USER_ID)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.nltk_data_path)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.CACHE_DIR_PATH)
await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_cant_write_dir(container, self.USER, self.CACHE_DIR_PATH)
await python_sanity_checks.check_python_version(container, "3.9.18")
await python_sanity_checks.check_pip_version(container, "23.2.1")
await python_sanity_checks.check_poetry_version(container, "1.6.1")
Expand Down
81 changes: 81 additions & 0 deletions airbyte-ci/connectors/base_images/base_images/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,84 @@ async def check_socat_version(container: dagger.Container, expected_socat_versio
raise errors.SanityCheckError(f"unexpected socat version: {version_number}")
else:
raise errors.SanityCheckError(f"Could not find the socat version in the version output: {socat_version_line}")


async def check_user_exists(container: dagger.Container, user: str, expected_uid: int, expected_gid: int):
"""Check that a user exists in the container, can be impersonated and has the expected user id and group id.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
expected_uid (int): The expected user id.
expected_gid (int): The expected group id.
Raises:
errors.SanityCheckError: Raised if the id command could not be executed or if the user does not exist.
"""
container = container.with_user(user)
try:
whoami_output = (await container.with_exec(["whoami"], skip_entrypoint=True).stdout()).strip()
except dagger.ExecError as e:
raise errors.SanityCheckError(e)
if whoami_output != user:
raise errors.SanityCheckError(f"The user {user} does not exist in the container.")
user_id = (await container.with_exec(["id", "-u"], skip_entrypoint=True).stdout()).strip()
if int(user_id) != expected_uid:
raise errors.SanityCheckError(f"Unexpected user id: {user_id}")
group_id = (await container.with_exec(["id", "-g"], skip_entrypoint=True).stdout()).strip()
if int(group_id) != expected_gid:
raise errors.SanityCheckError(f"Unexpected group id: {group_id}")


async def check_user_can_read_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user has read permissions on files in a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the given user could not read a file created in the given directory.
"""
try:
await container.with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True).with_user(user).with_exec(
["cat", f"{dir_path}/foo.txt"], skip_entrypoint=True
)
except dagger.ExecError:
raise errors.SanityCheckError(f"{dir_path} is not readable by the {user}.")


async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user can't write files to a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the user could write a file in the given directory.
"""
try:
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True)
except dagger.ExecError:
return
raise errors.SanityCheckError(f"{dir_path} is writable by the {user}.")


async def check_user_can_write_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user has write permissions on files in a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the user could write a file in the given directory.
"""
try:
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True)
except dagger.ExecError:
raise errors.SanityCheckError(f"{dir_path} is not writable by the {user}.")
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
"changelog_entry": "Add CDK system dependencies: nltk data, tesseract, poppler.",
"dockerfile_example": "FROM docker.io/python:3.9.18-slim-bookworm@sha256:44b7f161ed03f85e96d423b9916cdc8cb0509fb970fd643bdbc9896d49e1cad0\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN pip install --upgrade pip==23.2.1\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.6.1\nRUN sh -c apt update && apt-get install -y socat=1.7.4.4-2\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir /usr/share/nltk_data"
},
{
"version": "1.2.0-rc.1",
"changelog_entry": "Create airbyte user",
"dockerfile_example": "FROM docker.io/python:3.9.18-slim-bookworm@sha256:44b7f161ed03f85e96d423b9916cdc8cb0509fb970fd643bdbc9896d49e1cad0\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN sh -c apt update && apt-get install -y socat=1.7.4.4-2\nRUN adduser --system --group --no-create-home airbyte\nRUN mkdir --mode 755 /custom_cache\nRUN mkdir --mode 755 /airbyte\nRUN chown airbyte:airbyte /airbyte\nENV PIP_CACHE_DIR=/custom_cache/pip\nRUN pip install --upgrade pip==23.2.1\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.6.1\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir -p 755 /usr/share/nltk_data"
},
{
"version": "1.2.0-rc.1",
"changelog_entry": "Create airbyte user",
"dockerfile_example": "FROM docker.io/python:3.9.18-slim-bookworm@sha256:44b7f161ed03f85e96d423b9916cdc8cb0509fb970fd643bdbc9896d49e1cad0\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN sh -c apt update && apt-get install -y socat=1.7.4.4-2\nRUN adduser --uid 1000 --system --group --no-create-home airbyte\nRUN mkdir --mode 755 /custom_cache\nRUN mkdir --mode 755 /airbyte\nRUN chown airbyte:airbyte /airbyte\nENV PIP_CACHE_DIR=/custom_cache/pip\nRUN pip install --upgrade pip==23.2.1\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.6.1\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir -p 755 /usr/share/nltk_data"
},
{
"version": "1.1.0",
"changelog_entry": "Install socat",
Expand Down

0 comments on commit 13f5b18

Please sign in to comment.