From 08104d6cf907208a3d406a0da1e7816417052137 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 6 Sep 2019 15:19:19 -0700 Subject: [PATCH] SDK - Containers - Build python container image based on current working directory (#1970) * SDK - Containers - Build container image from current environment * Removed the ability to capture the active python environment (as requested by @hongye-sun) * Added the type hint and docstring to for the return type. * Renamed `build_image_from_env` function to `build_image_from_working_dir` as requested by @hongye-sun * Explained the function behavior in the documentation. * Removed extra empty line * Improved caching by copying python files only after installing python packages * Made test more portable * Added support for specifying the base_image `kfp.containers.default_base_image = ...` The image can also be a callable returning the image name. * Renamed `get_python_image` to `get_python_image_for_current_version` * Switched the default base image to Google Deep Learning container image as requested by @hongye-sun The size of this image is 4.35GB which really concerns me. The GPU image size is 6.45GB. * Stopped importing kfp.containers.* into kfp.* * Fixed test * Fixed the regex string * Fixed the type annotation style * Addressed @hongye-sun feedback * Removed the container image size warning * Fixed import failure --- sdk/python/kfp/__init__.py | 2 +- sdk/python/kfp/compiler/_container_builder.py | 4 +- sdk/python/kfp/containers/__init__.py | 14 ++ sdk/python/kfp/containers/_build_image_api.py | 120 ++++++++++++++++++ sdk/python/setup.py | 1 + sdk/python/tests/containers/__init__.py | 0 .../tests/containers/test_build_image_api.py | 84 ++++++++++++ 7 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 sdk/python/kfp/containers/__init__.py create mode 100644 sdk/python/kfp/containers/_build_image_api.py create mode 100644 sdk/python/tests/containers/__init__.py create mode 100644 sdk/python/tests/containers/test_build_image_api.py diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 191db7d2abe..9ceced192e2 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -15,4 +15,4 @@ from ._client import Client from ._config import * -from ._runners import * \ No newline at end of file +from ._runners import * diff --git a/sdk/python/kfp/compiler/_container_builder.py b/sdk/python/kfp/compiler/_container_builder.py index d8a9f785d12..bf85dfde21c 100644 --- a/sdk/python/kfp/compiler/_container_builder.py +++ b/sdk/python/kfp/compiler/_container_builder.py @@ -152,11 +152,11 @@ def _wrap_dir_in_tarball(self, tarball_path, dir_name): with tarfile.open(tarball_path, 'w:gz') as tarball: tarball.add(dir_name, arcname='') - def build(self, local_dir, docker_filename, target_image=None, timeout=1000): + def build(self, local_dir, docker_filename : str = 'Dockerfile', target_image=None, timeout=1000): """ Args: local_dir (str): local directory that stores all the necessary build files - docker_filename (str): the dockerfile name that is in the local_dir + docker_filename (str): the path of the Dockerfile relative to the local_dir target_image (str): the target image tag to push the final image. timeout (int): time out in seconds. Default: 1000 """ diff --git a/sdk/python/kfp/containers/__init__.py b/sdk/python/kfp/containers/__init__.py new file mode 100644 index 00000000000..b883dcb1aa1 --- /dev/null +++ b/sdk/python/kfp/containers/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the speci + +from ._build_image_api import * diff --git a/sdk/python/kfp/containers/_build_image_api.py b/sdk/python/kfp/containers/_build_image_api.py new file mode 100644 index 00000000000..4477f6230ad --- /dev/null +++ b/sdk/python/kfp/containers/_build_image_api.py @@ -0,0 +1,120 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the speci + +__all__ = [ + 'build_image_from_working_dir', +] + + +import logging +import os +import re +import shutil +import sys +import tempfile + +import requests + +from ..compiler._container_builder import ContainerBuilder + + +default_base_image = 'gcr.io/deeplearning-platform-release/tf-cpu.1-14' + + +_container_work_dir = '/python_env' + + +_default_image_builder = None + + +def _get_default_image_builder(): + global _default_image_builder + if _default_image_builder is None: + from ..compiler._container_builder import ContainerBuilder + _default_image_builder = ContainerBuilder() + + +def _generate_dockerfile_text(context_dir: str, dockerfile_path: str, base_image: str = None) -> str: + # Generating the Dockerfile + logging.info('Generating the Dockerfile') + + requirements_rel_path = 'requirements.txt' + requirements_path = os.path.join(context_dir, requirements_rel_path) + requirements_file_exists = os.path.exists(requirements_path) + + if not base_image: + base_image = default_base_image + if callable(base_image): + base_image = base_image() + + dockerfile_lines = [] + dockerfile_lines.append('FROM {}'.format(base_image)) + dockerfile_lines.append('WORKDIR {}'.format(_container_work_dir)) + if requirements_file_exists: + dockerfile_lines.append('COPY {} .'.format(requirements_rel_path)) + dockerfile_lines.append('RUN python3 -m pip install -r {}'.format(requirements_rel_path)) + dockerfile_lines.append('COPY . .') + + return '\n'.join(dockerfile_lines) + + +def build_image_from_working_dir(image_name: str = None, working_dir: str = None, file_filter_re: str = r'.*\.py', timeout: int = 1000, base_image: str = None, builder: ContainerBuilder = None) -> str: + '''build_image_from_working_dir builds and pushes a new container image that captures the current python working directory. + This function recursively scans the working directory and captures the following files in the container image context: + * requirements.txt files + * all python files (can be overridden by passing a different `file_filter_re` argument) + + The function generates Dockerfile that starts from a python container image, install packages from requirements.txt (if present) and copies all the captured python files to the container image. + The Dockerfile can be overridden by placing a custom Dockerfile in the root of the working directory. + Args: + image_name: Optional. The image repo name where the new container image will be pushed. The name will be generated if not not set. + working_dir: Optional. The directory that will be captured. The current directory will be used if omitted. + file_filter_re: Optional. A regular expression that will be used to decide which files to include in the container building context. + timeout: Optional. The image building timeout in seconds. + base_image: Optional. The container image to use as the base for the new image. If not set, the Google Deep Learning Tensorflow CPU image will be used. + builder: Optional. An instance of ContainerBuilder or compatible class that will be used to build the image. + Returns: + The full name of the container image including the hash digest. E.g. gcr.io/my-org/my-image@sha256:86c1...793c. + ''' + current_dir = working_dir or os.getcwd() + with tempfile.TemporaryDirectory() as context_dir: + logging.info('Creating the build context directory: {}'.format(context_dir)) + + # Copying all *.py and requirements.txt files + for dirpath, dirnames, filenames in os.walk(current_dir): + dst_dirpath = os.path.join(context_dir, os.path.relpath(dirpath, current_dir)) + os.makedirs(dst_dirpath, exist_ok=True) + for file_name in filenames: + if re.match(file_filter_re, file_name) or file_name == 'requirements.txt': + src_path = os.path.join(dirpath, file_name) + dst_path = os.path.join(dst_dirpath, file_name) + shutil.copy(src_path, dst_path) + + src_dockerfile_path = os.path.join(current_dir, 'Dockerfile') + dst_dockerfile_path = os.path.join(context_dir, 'Dockerfile') + if os.path.exists(src_dockerfile_path): + if base_image: + raise ValueError('Cannot specify base_image when using custom Dockerfile (which already specifies the base image).') + shutil.copy(src_dockerfile_path, dst_dockerfile_path) + else: + dockerfile_text = _generate_dockerfile_text(context_dir, dst_dockerfile_path, base_image) + with open(dst_dockerfile_path, 'w') as f: + f.write(dockerfile_text) + + if builder is None: + builder = _get_default_image_builder() + return builder.build( + local_dir=context_dir, + target_image=image_name, + timeout=timeout, + ) diff --git a/sdk/python/setup.py b/sdk/python/setup.py index f30003b68d3..17764b67966 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -51,6 +51,7 @@ 'kfp.components', 'kfp.components.structures', 'kfp.components.structures.kubernetes', + 'kfp.containers', 'kfp.dsl', 'kfp.notebook', ], diff --git a/sdk/python/tests/containers/__init__.py b/sdk/python/tests/containers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/tests/containers/test_build_image_api.py b/sdk/python/tests/containers/test_build_image_api.py new file mode 100644 index 00000000000..e5d8e2c0da3 --- /dev/null +++ b/sdk/python/tests/containers/test_build_image_api.py @@ -0,0 +1,84 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the speci + +import os +import re +import sys +import tempfile +import unittest +from pathlib import Path +from typing import Callable + +import mock + +from kfp.containers import build_image_from_working_dir + + +class MockImageBuilder: + def __init__(self, dockerfile_text_check : Callable[[str], None] = None, requirements_text_check : Callable[[str], None] = None, file_paths_check : Callable[[str], None] = None): + self.dockerfile_text_check = dockerfile_text_check + self.requirements_text_check = requirements_text_check + self.file_paths_check = file_paths_check + + def build(self, local_dir = None, target_image = None, timeout = 1000): + if self.dockerfile_text_check: + actual_dockerfile_text = (Path(local_dir) / 'Dockerfile').read_text() + self.dockerfile_text_check(actual_dockerfile_text) + if self.requirements_text_check: + actual_requirements_text = (Path(local_dir) / 'requirements.txt').read_text() + self.requirements_text_check(actual_requirements_text) + if self.file_paths_check: + file_paths = set(os.path.relpath(os.path.join(dirpath, file_name), local_dir) for dirpath, dirnames, filenames in os.walk(local_dir) for file_name in filenames) + self.file_paths_check(file_paths) + return target_image + + +class BuildImageApiTests(unittest.TestCase): + def test_build_image_from_working_dir(self): + expected_dockerfile_text_re = ''' +FROM python:3.6.5 +WORKDIR /.* +COPY requirements.txt . +RUN python3 -m pip install -r requirements.txt +COPY . . +''' + #mock_builder = + with tempfile.TemporaryDirectory() as context_dir: + requirements_text = 'pandas==1.24' + requirements_txt_relpath = Path('.') / 'requirements.txt' + file1_py_relpath = Path('.') / 'lib' / 'file1.py' + file1_sh_relpath = Path('.') / 'lib' / 'file1.sh' + + context_path = Path(context_dir) + (context_path / requirements_txt_relpath).write_text(requirements_text) + (context_path / file1_py_relpath).parent.mkdir(parents=True, exist_ok=True) + (context_path / file1_py_relpath).write_text('#py file') + (context_path / file1_sh_relpath).parent.mkdir(parents=True, exist_ok=True) + (context_path / file1_sh_relpath).write_text('#sh file') + expected_file_paths = { + 'Dockerfile', + str(requirements_txt_relpath), + str(file1_py_relpath), + } + def dockerfile_text_check(actual_dockerfile_text): + self.assertRegex(actual_dockerfile_text.strip(), expected_dockerfile_text_re.strip()) + def requirements_text_check(actual_requirements_text): + self.assertEqual(actual_requirements_text.strip(), requirements_text.strip()) + def file_paths_check(file_paths): + self.assertEqual(file_paths, expected_file_paths) + + builder = MockImageBuilder(dockerfile_text_check, requirements_text_check, file_paths_check) + result = build_image_from_working_dir(working_dir=context_dir, base_image='python:3.6.5', builder=builder) + +if __name__ == '__main__': + unittest.main()