Skip to content

Commit

Permalink
Python base image: create airbyte user
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere committed Nov 20, 2024
1 parent e610db7 commit c1d53e3
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 10 deletions.
4 changes: 4 additions & 0 deletions airbyte-ci/connectors/base_images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ RUN mkdir /usr/share/nltk_data
| 2.0.0 || docker.io/airbyte/python-connector-base:2.0.0@sha256:c44839ba84406116e8ba68722a0f30e8f6e7056c726f447681bb9e9ece8bd916 | Use Python 3.10 |
| 1.2.3 || docker.io/airbyte/python-connector-base:1.2.3@sha256:a8abfdc75f8e22931657a1ae15069e7b925e74bb7b5ef36371a85e4caeae5696 | Use latest root image version and update system packages |
| 1.2.2 || docker.io/airbyte/python-connector-base:1.2.2@sha256:57703de3b4c4204bd68a7b13c9300f8e03c0189bffddaffc796f1da25d2dbea0 | Fix Python 3.9.19 image digest |
<<<<<<< HEAD
| 1.2.2-rc.1 || docker.io/airbyte/python-connector-base:1.2.2-rc.1@sha256:a8abfdc75f8e22931657a1ae15069e7b925e74bb7b5ef36371a85e4caeae5696 | |
=======
| 1.2.2-rc.1 || docker.io/airbyte/python-connector-base:1.2.2-rc.1@sha256:9715c1ba59b69783eb87758eb0c2fac8d3904233d6ffd687c5642af97eed0c7a | Create an airbyte user and use it |
>>>>>>> 328dbe9f1c (Python base image: create airbyte user)
| 1.2.1 || docker.io/airbyte/python-connector-base:1.2.1@sha256:4a4255e2bccab71fa5912487e42d9755cdecffae77273fed8be01a081cd6e795 | Upgrade to Python 3.9.19 + update pip and setuptools |
| 1.2.0 || docker.io/airbyte/python-connector-base:1.2.0@sha256:c22a9d97464b69d6ef01898edf3f8612dc11614f05a84984451dde195f337db9 | Add CDK system dependencies: nltk data, tesseract, poppler. |
| 1.2.0-rc.1 || docker.io/airbyte/python-connector-base:1.2.0-rc.1@sha256:f6467768b75fb09125f6e6b892b6b48c98d9fe085125f3ff4adc722afb1e5b30 | |
Expand Down
25 changes: 23 additions & 2 deletions airbyte-ci/connectors/base_images/base_images/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ class AirbyteConnectorBaseImage(ABC):
Please do not declare any Dagger with_exec instruction in this class as in the abstract class context we have no guarantee about the underlying system used in the base image.
"""

USER: str = "airbyte"
USER_ID: int = 1000
CACHE_DIR_PATH: str = "/custom_cache"
AIRBYTE_DIR_PATH: str = "/airbyte"

@final
def __init__(self, dagger_client: dagger.Client, version: semver.VersionInfo):
"""Initializes the Airbyte base image.
Expand Down Expand Up @@ -94,9 +99,25 @@ async def run_sanity_checks(self, platform: dagger.Platform):
# INSTANCE METHODS:
@final
def get_base_container(self, platform: dagger.Platform) -> dagger.Container:
"""Returns a container using the base image. This container is used to build the Airbyte base image.
"""Returns a container using the base image.
This container is used to build the Airbyte base image.
We create the user 'airbyte' with the UID 1000 and GID 1000.
Returns:
dagger.Container: The container using the base python image.
"""
return self.dagger_client.container(platform=platform).from_(self.root_image.address)
return (
self.dagger_client.container(platform=platform)
.from_(self.root_image.address)
# Set the timezone to UTC
.with_exec(["ln", "-snf", "/usr/share/zoneinfo/Etc/UTC", "/etc/localtime"])
# Create the user 'airbyte' with the UID 1000 and GID 1000
.with_exec(["adduser", "--uid", str(self.USER_ID), "--system", "--group", "--no-create-home", self.USER])
# Create the cache airbyte directories and set the right permissions
.with_exec(["mkdir", "--mode", "755", self.CACHE_DIR_PATH])
.with_exec(["mkdir", "--mode", "755", self.AIRBYTE_DIR_PATH])
# Change the owner of the airbyte directory to the user 'airbyte'
.with_exec(["chown", f"{self.USER}:{self.USER}", self.AIRBYTE_DIR_PATH])
# Change the owner of the cache directory to the user 'airbyte'
.with_exec(["chown", f"{self.USER}:{self.USER}", self.CACHE_DIR_PATH])
)
27 changes: 19 additions & 8 deletions airbyte-ci/connectors/base_images/base_images/python/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class AirbytePythonConnectorBaseImage(bases.AirbyteConnectorBaseImage):
},
}

@property
def pip_cache_path(self) -> str:
return f"{self.CACHE_DIR_PATH}/pip"

def install_cdk_system_dependencies(self) -> Callable:
def get_nltk_data_dir() -> dagger.Directory:
"""Returns a dagger directory containing the nltk data.
Expand Down Expand Up @@ -71,7 +75,9 @@ def with_file_based_connector_dependencies(container: dagger.Container) -> dagge
- nltk data
"""
container = with_tesseract_and_poppler(container)
container = container.with_exec(["mkdir", self.nltk_data_path]).with_directory(self.nltk_data_path, get_nltk_data_dir())
container = container.with_exec(["mkdir", "-p", "755", self.nltk_data_path]).with_directory(
self.nltk_data_path, get_nltk_data_dir()
)
return container

return with_file_based_connector_dependencies
Expand All @@ -92,18 +98,17 @@ def get_container(self, platform: dagger.Platform) -> dagger.Container:

return (
self.get_base_container(platform)
.with_mounted_cache("/root/.cache/pip", pip_cache_volume)
.with_exec(["ln", "-snf", "/usr/share/zoneinfo/Etc/UTC", "/etc/localtime"], use_entrypoint=True)
.with_exec(["pip", "install", "--upgrade", "pip==24.0", "setuptools==70.0.0"], use_entrypoint=True)
.with_mounted_cache(self.pip_cache_path, pip_cache_volume, owner=self.USER)
.with_env_variable("PIP_CACHE_DIR", self.pip_cache_path)
# Upgrade pip to the expected version
.with_exec(["pip", "install", "--upgrade", "pip==24.0", "setuptools==70.0.0"])
# Declare poetry specific environment variables
.with_env_variable("POETRY_VIRTUALENVS_CREATE", "false")
.with_env_variable("POETRY_VIRTUALENVS_IN_PROJECT", "false")
.with_env_variable("POETRY_NO_INTERACTION", "1")
.with_exec(["pip", "install", "poetry==1.6.1"])
.with_exec(
["sh", "-c", "apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y && apt-get clean"], use_entrypoint=True
)
.with_exec(["sh", "-c", "apt-get install -y socat=1.7.4.4-2"], use_entrypoint=True)
.with_exec(["sh", "-c", "apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y && apt-get clean"])
.with_exec(["sh", "-c", "apt-get install -y socat=1.7.4.4-2"])
# Install CDK system dependencies
.with_(self.install_cdk_system_dependencies())
)
Expand All @@ -121,6 +126,12 @@ async def run_sanity_checks(self, platform: dagger.Platform):
await base_sanity_checks.check_a_command_is_available_using_version_option(container, "bash")
await python_sanity_checks.check_python_version(container, "3.10.14")
await python_sanity_checks.check_pip_version(container, "24.0")
await base_sanity_checks.check_user_exists(container, self.USER, expected_uid=self.USER_ID, expected_gid=self.USER_ID)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.nltk_data_path)
await base_sanity_checks.check_user_can_read_dir(container, self.USER, self.CACHE_DIR_PATH)
await base_sanity_checks.check_user_can_write_dir(container, self.USER, self.AIRBYTE_DIR_PATH)
await base_sanity_checks.check_user_cant_write_dir(container, self.USER, self.CACHE_DIR_PATH)
await python_sanity_checks.check_poetry_version(container, "1.6.1")
await python_sanity_checks.check_python_image_has_expected_env_vars(container)
await base_sanity_checks.check_a_command_is_available_using_version_option(container, "socat", "-V")
Expand Down
81 changes: 81 additions & 0 deletions airbyte-ci/connectors/base_images/base_images/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,84 @@ async def check_socat_version(container: dagger.Container, expected_socat_versio
raise errors.SanityCheckError(f"unexpected socat version: {version_number}")
else:
raise errors.SanityCheckError(f"Could not find the socat version in the version output: {socat_version_line}")


async def check_user_exists(container: dagger.Container, user: str, expected_uid: int, expected_gid: int):
"""Check that a user exists in the container, can be impersonated and has the expected user id and group id.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
expected_uid (int): The expected user id.
expected_gid (int): The expected group id.
Raises:
errors.SanityCheckError: Raised if the id command could not be executed or if the user does not exist.
"""
container = container.with_user(user)
try:
whoami_output = (await container.with_exec(["whoami"], skip_entrypoint=True).stdout()).strip()
except dagger.ExecError as e:
raise errors.SanityCheckError(e)
if whoami_output != user:
raise errors.SanityCheckError(f"The user {user} does not exist in the container.")
user_id = (await container.with_exec(["id", "-u"], skip_entrypoint=True).stdout()).strip()
if int(user_id) != expected_uid:
raise errors.SanityCheckError(f"Unexpected user id: {user_id}")
group_id = (await container.with_exec(["id", "-g"], skip_entrypoint=True).stdout()).strip()
if int(group_id) != expected_gid:
raise errors.SanityCheckError(f"Unexpected group id: {group_id}")


async def check_user_can_read_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user has read permissions on files in a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the given user could not read a file created in the given directory.
"""
try:
await container.with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True).with_user(user).with_exec(
["cat", f"{dir_path}/foo.txt"], skip_entrypoint=True
)
except dagger.ExecError:
raise errors.SanityCheckError(f"{dir_path} is not readable by the {user}.")


async def check_user_cant_write_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user can't write files to a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the user could write a file in the given directory.
"""
try:
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True)
except dagger.ExecError:
return
raise errors.SanityCheckError(f"{dir_path} is writable by the {user}.")


async def check_user_can_write_dir(container: dagger.Container, user: str, dir_path: str):
"""Check that the given user has write permissions on files in a given directory.
Args:
container (dagger.Container): The container on which the sanity checks should run.
user (str): The user to impersonate.
dir_path (str): The directory path to check.
Raises:
errors.SanityCheckError: Raised if the user could write a file in the given directory.
"""
try:
await container.with_user(user).with_exec(["touch", f"{dir_path}/foo.txt"], skip_entrypoint=True)
except dagger.ExecError:
raise errors.SanityCheckError(f"{dir_path} is not writable by the {user}.")
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"changelog_entry": "Fix Python 3.9.19 image digest",
"dockerfile_example": "FROM docker.io/python:3.9.19-slim-bookworm@sha256:088d9217202188598aac37f8db0929345e124a82134ac66b8bb50ee9750b045b\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN pip install --upgrade pip==24.0 setuptools==70.0.0\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.6.1\nRUN sh -c apt update && apt-get install -y socat=1.7.4.4-2\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir /usr/share/nltk_data"
},
{
"version": "1.2.2-rc.1",
"changelog_entry": "Create an airbyte user and use it",
"dockerfile_example": "FROM docker.io/python:3.9.19-slim-bookworm@sha256:088d9217202188598aac37f8db0929345e124a82134ac66b8bb50ee9750b045b\nRUN ln -snf /usr/share/zoneinfo/Etc/UTC /etc/localtime\nRUN sh -c apt update && apt-get install -y socat=1.7.4.4-2\nRUN adduser --uid 1000 --system --group --no-create-home airbyte\nRUN mkdir --mode 755 /custom_cache\nRUN mkdir --mode 755 /airbyte\nRUN chown airbyte:airbyte /airbyte\nENV PIP_CACHE_DIR=/custom_cache/pip\nRUN pip install --upgrade pip==24.0 setuptools==70.0.0\nENV POETRY_VIRTUALENVS_CREATE=false\nENV POETRY_VIRTUALENVS_IN_PROJECT=false\nENV POETRY_NO_INTERACTION=1\nRUN pip install poetry==1.6.1\nRUN sh -c apt-get update && apt-get install -y tesseract-ocr=5.3.0-2 poppler-utils=22.12.0-2+b1\nRUN mkdir -p 755 /usr/share/nltk_data"
},
{
"version": "1.2.1",
"changelog_entry": "Upgrade to Python 3.9.19 + update pip and setuptools",
Expand Down

0 comments on commit c1d53e3

Please sign in to comment.