Skip to content

Commit

Permalink
SDK - Containers - Build python container image based on current work…
Browse files Browse the repository at this point in the history
…ing directory (#1970)

* SDK - Containers - Build container image from current environment

* Removed the ability to capture the active python environment (as requested by @hongye-sun)

* Added the type hint and docstring to for the return type.

* Renamed `build_image_from_env` function to `build_image_from_working_dir`
as requested by @hongye-sun

* Explained the function behavior in the documentation.

* Removed extra empty line

* Improved caching by copying python files only after installing python packages

* Made test more portable

* Added support for specifying the base_image
`kfp.containers.default_base_image = ...`
The image can also be a callable returning the image name.

* Renamed `get_python_image` to `get_python_image_for_current_version`

* Switched the default base image to Google Deep Learning container image as requested by @hongye-sun
The size of this image is 4.35GB which really concerns me. The GPU image size is 6.45GB.

* Stopped importing kfp.containers.* into kfp.*

* Fixed test

* Fixed the regex string

* Fixed the type annotation style

* Addressed @hongye-sun feedback

* Removed the container image size warning

* Fixed import failure
  • Loading branch information
Ark-kun authored and k8s-ci-robot committed Sep 6, 2019
1 parent 60018e3 commit 08104d6
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 3 deletions.
2 changes: 1 addition & 1 deletion sdk/python/kfp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@

from ._client import Client
from ._config import *
from ._runners import *
from ._runners import *
4 changes: 2 additions & 2 deletions sdk/python/kfp/compiler/_container_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ def _wrap_dir_in_tarball(self, tarball_path, dir_name):
with tarfile.open(tarball_path, 'w:gz') as tarball:
tarball.add(dir_name, arcname='')

def build(self, local_dir, docker_filename, target_image=None, timeout=1000):
def build(self, local_dir, docker_filename : str = 'Dockerfile', target_image=None, timeout=1000):
"""
Args:
local_dir (str): local directory that stores all the necessary build files
docker_filename (str): the dockerfile name that is in the local_dir
docker_filename (str): the path of the Dockerfile relative to the local_dir
target_image (str): the target image tag to push the final image.
timeout (int): time out in seconds. Default: 1000
"""
Expand Down
14 changes: 14 additions & 0 deletions sdk/python/kfp/containers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the speci

from ._build_image_api import *
120 changes: 120 additions & 0 deletions sdk/python/kfp/containers/_build_image_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the speci

__all__ = [
'build_image_from_working_dir',
]


import logging
import os
import re
import shutil
import sys
import tempfile

import requests

from ..compiler._container_builder import ContainerBuilder


default_base_image = 'gcr.io/deeplearning-platform-release/tf-cpu.1-14'


_container_work_dir = '/python_env'


_default_image_builder = None


def _get_default_image_builder():
global _default_image_builder
if _default_image_builder is None:
from ..compiler._container_builder import ContainerBuilder
_default_image_builder = ContainerBuilder()


def _generate_dockerfile_text(context_dir: str, dockerfile_path: str, base_image: str = None) -> str:
# Generating the Dockerfile
logging.info('Generating the Dockerfile')

requirements_rel_path = 'requirements.txt'
requirements_path = os.path.join(context_dir, requirements_rel_path)
requirements_file_exists = os.path.exists(requirements_path)

if not base_image:
base_image = default_base_image
if callable(base_image):
base_image = base_image()

dockerfile_lines = []
dockerfile_lines.append('FROM {}'.format(base_image))
dockerfile_lines.append('WORKDIR {}'.format(_container_work_dir))
if requirements_file_exists:
dockerfile_lines.append('COPY {} .'.format(requirements_rel_path))
dockerfile_lines.append('RUN python3 -m pip install -r {}'.format(requirements_rel_path))
dockerfile_lines.append('COPY . .')

return '\n'.join(dockerfile_lines)


def build_image_from_working_dir(image_name: str = None, working_dir: str = None, file_filter_re: str = r'.*\.py', timeout: int = 1000, base_image: str = None, builder: ContainerBuilder = None) -> str:
'''build_image_from_working_dir builds and pushes a new container image that captures the current python working directory.
This function recursively scans the working directory and captures the following files in the container image context:
* requirements.txt files
* all python files (can be overridden by passing a different `file_filter_re` argument)
The function generates Dockerfile that starts from a python container image, install packages from requirements.txt (if present) and copies all the captured python files to the container image.
The Dockerfile can be overridden by placing a custom Dockerfile in the root of the working directory.
Args:
image_name: Optional. The image repo name where the new container image will be pushed. The name will be generated if not not set.
working_dir: Optional. The directory that will be captured. The current directory will be used if omitted.
file_filter_re: Optional. A regular expression that will be used to decide which files to include in the container building context.
timeout: Optional. The image building timeout in seconds.
base_image: Optional. The container image to use as the base for the new image. If not set, the Google Deep Learning Tensorflow CPU image will be used.
builder: Optional. An instance of ContainerBuilder or compatible class that will be used to build the image.
Returns:
The full name of the container image including the hash digest. E.g. gcr.io/my-org/my-image@sha256:86c1...793c.
'''
current_dir = working_dir or os.getcwd()
with tempfile.TemporaryDirectory() as context_dir:
logging.info('Creating the build context directory: {}'.format(context_dir))

# Copying all *.py and requirements.txt files
for dirpath, dirnames, filenames in os.walk(current_dir):
dst_dirpath = os.path.join(context_dir, os.path.relpath(dirpath, current_dir))
os.makedirs(dst_dirpath, exist_ok=True)
for file_name in filenames:
if re.match(file_filter_re, file_name) or file_name == 'requirements.txt':
src_path = os.path.join(dirpath, file_name)
dst_path = os.path.join(dst_dirpath, file_name)
shutil.copy(src_path, dst_path)

src_dockerfile_path = os.path.join(current_dir, 'Dockerfile')
dst_dockerfile_path = os.path.join(context_dir, 'Dockerfile')
if os.path.exists(src_dockerfile_path):
if base_image:
raise ValueError('Cannot specify base_image when using custom Dockerfile (which already specifies the base image).')
shutil.copy(src_dockerfile_path, dst_dockerfile_path)
else:
dockerfile_text = _generate_dockerfile_text(context_dir, dst_dockerfile_path, base_image)
with open(dst_dockerfile_path, 'w') as f:
f.write(dockerfile_text)

if builder is None:
builder = _get_default_image_builder()
return builder.build(
local_dir=context_dir,
target_image=image_name,
timeout=timeout,
)
1 change: 1 addition & 0 deletions sdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
'kfp.components',
'kfp.components.structures',
'kfp.components.structures.kubernetes',
'kfp.containers',
'kfp.dsl',
'kfp.notebook',
],
Expand Down
Empty file.
84 changes: 84 additions & 0 deletions sdk/python/tests/containers/test_build_image_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the speci

import os
import re
import sys
import tempfile
import unittest
from pathlib import Path
from typing import Callable

import mock

from kfp.containers import build_image_from_working_dir


class MockImageBuilder:
def __init__(self, dockerfile_text_check : Callable[[str], None] = None, requirements_text_check : Callable[[str], None] = None, file_paths_check : Callable[[str], None] = None):
self.dockerfile_text_check = dockerfile_text_check
self.requirements_text_check = requirements_text_check
self.file_paths_check = file_paths_check

def build(self, local_dir = None, target_image = None, timeout = 1000):
if self.dockerfile_text_check:
actual_dockerfile_text = (Path(local_dir) / 'Dockerfile').read_text()
self.dockerfile_text_check(actual_dockerfile_text)
if self.requirements_text_check:
actual_requirements_text = (Path(local_dir) / 'requirements.txt').read_text()
self.requirements_text_check(actual_requirements_text)
if self.file_paths_check:
file_paths = set(os.path.relpath(os.path.join(dirpath, file_name), local_dir) for dirpath, dirnames, filenames in os.walk(local_dir) for file_name in filenames)
self.file_paths_check(file_paths)
return target_image


class BuildImageApiTests(unittest.TestCase):
def test_build_image_from_working_dir(self):
expected_dockerfile_text_re = '''
FROM python:3.6.5
WORKDIR /.*
COPY requirements.txt .
RUN python3 -m pip install -r requirements.txt
COPY . .
'''
#mock_builder =
with tempfile.TemporaryDirectory() as context_dir:
requirements_text = 'pandas==1.24'
requirements_txt_relpath = Path('.') / 'requirements.txt'
file1_py_relpath = Path('.') / 'lib' / 'file1.py'
file1_sh_relpath = Path('.') / 'lib' / 'file1.sh'

context_path = Path(context_dir)
(context_path / requirements_txt_relpath).write_text(requirements_text)
(context_path / file1_py_relpath).parent.mkdir(parents=True, exist_ok=True)
(context_path / file1_py_relpath).write_text('#py file')
(context_path / file1_sh_relpath).parent.mkdir(parents=True, exist_ok=True)
(context_path / file1_sh_relpath).write_text('#sh file')
expected_file_paths = {
'Dockerfile',
str(requirements_txt_relpath),
str(file1_py_relpath),
}
def dockerfile_text_check(actual_dockerfile_text):
self.assertRegex(actual_dockerfile_text.strip(), expected_dockerfile_text_re.strip())
def requirements_text_check(actual_requirements_text):
self.assertEqual(actual_requirements_text.strip(), requirements_text.strip())
def file_paths_check(file_paths):
self.assertEqual(file_paths, expected_file_paths)

builder = MockImageBuilder(dockerfile_text_check, requirements_text_check, file_paths_check)
result = build_image_from_working_dir(working_dir=context_dir, base_image='python:3.6.5', builder=builder)

if __name__ == '__main__':
unittest.main()

0 comments on commit 08104d6

Please sign in to comment.