diff --git a/data/ignore_ids_safety_scan.json b/data/ignore_ids_safety_scan.json new file mode 100644 index 000000000000..bad59904bc00 --- /dev/null +++ b/data/ignore_ids_safety_scan.json @@ -0,0 +1,138 @@ +{ + "tensorflow": { + "training": { + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2", + "35015":"for shipping pycrypto<=2.6.1 - the last available version for py2" + }, + "py3": { + "41161":"TF 2.6.0 is the last version for 2.6 series and does not have a fix yet." + } + }, + "inference":{ + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2" + }, + "py3": { + } + }, + "inference-eia": { + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2" + }, + "py3": { + } + }, + "inference-neuron":{ + "_comment":"py2 is deprecated", + "py2": { + }, + "py3": { + "39409":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39408":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39407":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39406":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches" + } + } + }, + "mxnet": { + "training": { + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2" + }, + "py3": { + } + }, + "inference": { + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2" + }, + "py3": { + } + }, + "inference-eia": { + "_comment":"py2 is deprecated", + "py2": { + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2", + "36810":"numpy<=1.16.0 -- This has to only be here while we publish MXNet 1.4.1 EI DLC v1.0" + }, + "py3": { + } + }, + "inference-neuron": { + "_comment":"py2 is deprecated", + "py2": { + }, + "py3": { + "40673":"for shipping tensorflow 1.15.5", + "40675":"for shipping tensorflow 1.15.5", + "40676":"for shipping tensorflow 1.15.5", + "40794":"for shipping tensorflow 1.15.5", + "40795":"for shipping tensorflow 1.15.5", + "40796":"for shipping tensorflow 1.15.5" + } + } + }, + "pytorch": { + "training": { + "_comment":"py2 is deprecated", + "py2": { + "35810":"for astropy<3.0.1", + "38449":"for shipping pillow<=6.2.2 - the last available version for py2", + "38450":"for shipping pillow<=6.2.2 - the last available version for py2", + "38451":"for shipping pillow<=6.2.2 - the last available version for py2", + "38452":"for shipping pillow<=6.2.2 - the last available version for py2" + }, + "py3": { + } + }, + "inference": { + "_comment":"py2 is deprecated", + "py2": { + }, + "py3": { + } + }, + "inference-eia": { + "_comment":"py2 is deprecated", + "py2": { + }, + "py3": { + } + }, + "inference-neuron": { + "_comment":"py2 is deprecated", + "py2": { + }, + "py3": { + "39409":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39408":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39407":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches", + "39406":"TF 1.15.5 is on par with TF 2.0.4, 2.1.3, 2.2.2, 2.3.2 in security patches" + } + } + } +} \ No newline at end of file diff --git a/miscellaneous_dockerfiles/Dockerfile.common b/miscellaneous_dockerfiles/Dockerfile.common new file mode 100644 index 000000000000..9a4968327ed3 --- /dev/null +++ b/miscellaneous_dockerfiles/Dockerfile.common @@ -0,0 +1,7 @@ +# Use the Deep Learning Container as a base Image +ARG PRE_PUSH_IMAGE="" + +FROM $PRE_PUSH_IMAGE + +# Copy safety report generated from PRE_PUSH_IMAGE to docker image +COPY safety_report.json /opt/aws/dlc/info/safety_report.json diff --git a/src/common_stage_image.py b/src/common_stage_image.py new file mode 100644 index 000000000000..bf59541b9cbd --- /dev/null +++ b/src/common_stage_image.py @@ -0,0 +1,69 @@ +""" +Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"). You +may not use this file except in compliance with the License. A copy of +the License is located at + + http://aws.amazon.com/apache2.0/ + +or in the "license" file accompanying this file. This file is +distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +ANY KIND, either express or implied. See the License for the specific +language governing permissions and limitations under the License. +""" + +from image import DockerImage +from context import Context +from utils import generate_safety_report_for_image, get_root_folder_path + +import os + + +class CommonStageImage(DockerImage): + """ + This class is especially designed to handle the build process for CommonStageImages. + All the functionality - either safety scan report, ecr scan report, etc. - that is especially + required to run the miscellaneous_dockerfiles/Dockerfile.common should go into this file. As of now, + this class takes care of generating a safety report from a pre_push_image and then uses this + safety report for creating a context for Dockerfile.common + """ + + def update_pre_build_configuration(self): + """ + Conducts all the pre-build configurations from the parent class and then conducts + Safety Scan on the images generated in previous stage builds. The safety scan generates + the safety_report which is then copied into the image. + """ + # Call the update_pre_build_configuration steps from the parent class + super(CommonStageImage, self).update_pre_build_configuration() + # Generate safety scan report for the first stage image and add the file to artifacts + pre_push_stage_image_uri = self.build_args["PRE_PUSH_IMAGE"] + processed_image_uri = pre_push_stage_image_uri.replace(".", "-").replace("/", "-").replace(":", "-") + image_name = self.name + tarfile_name_for_context = f"{processed_image_uri}-{image_name}" + storage_file_path = os.path.join( + os.sep, get_root_folder_path(), "src", f"{tarfile_name_for_context}_safety_report.json" + ) + generate_safety_report_for_image( + pre_push_stage_image_uri, image_info=self.info, storage_file_path=storage_file_path + ) + self.context = self.generate_common_stage_context(storage_file_path, tarfile_name=tarfile_name_for_context) + + def generate_common_stage_context(self, safety_report_path, tarfile_name="common-stage-file"): + """ + For CommonStageImage, build context is built once the safety report is generated. This is because + the Dockerfile.common uses this safety report to COPY the report into the image. + """ + artifacts = { + "safety_report": {"source": safety_report_path, "target": "safety_report.json"}, + "dockerfile": { + "source": os.path.join( + os.sep, get_root_folder_path(), "miscellaneous_dockerfiles", "Dockerfile.common" + ), + "target": "Dockerfile", + }, + } + + artifact_root = os.path.join(os.sep, get_root_folder_path(), "src") + return Context(artifacts, context_path=f"build/{tarfile_name}.tar.gz", artifact_root=artifact_root) diff --git a/src/constants.py b/src/constants.py index e2508bda638e..7be641ef288a 100644 --- a/src/constants.py +++ b/src/constants.py @@ -31,6 +31,10 @@ # Left and right padding between text and margins in output PADDING = 1 +# Docker build stages +PRE_PUSH_STAGE = "pre_push" +COMMON_STAGE = "common" + # Docker connections DOCKER_URL = "unix://var/run/docker.sock" @@ -71,3 +75,7 @@ ECS_TESTS = "ecs" EKS_TESTS = "eks" ALL_TESTS = ["sagemaker", "ec2", "eks", "ecs"] + +# Timeout in seconds for Docker API client. +API_CLIENT_TIMEOUT = 600 +MAX_WORKER_COUNT_FOR_PUSHING_IMAGES = 3 diff --git a/src/image.py b/src/image.py index d0d9be2607f6..dcf0bd3ad85a 100644 --- a/src/image.py +++ b/src/image.py @@ -18,6 +18,11 @@ from docker import DockerClient import constants +import logging +import json + +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.DEBUG) class DockerImage: @@ -25,9 +30,7 @@ class DockerImage: The DockerImage class has the functions and attributes for building the dockerimage """ - def __init__( - self, info, dockerfile, repository, tag, to_build, context=None, - ): + def __init__(self, info, dockerfile, repository, tag, to_build, stage, context=None, to_push=True, additional_tags=[]): # Meta-data about the image should go to info. # All keys in info are accessible as attributes @@ -36,13 +39,16 @@ def __init__( self.summary = {} self.build_args = {} self.labels = {} + self.stage = stage self.dockerfile = dockerfile self.context = context + self.to_push = to_push # TODO: Add ability to tag image with multiple tags self.repository = repository self.tag = tag + self.additional_tags = additional_tags self.ecr_url = f"{self.repository}:{self.tag}" if not isinstance(to_build, bool): @@ -50,8 +56,9 @@ def __init__( self.to_build = to_build self.build_status = None - self.client = APIClient(base_url=constants.DOCKER_URL) + self.client = APIClient(base_url=constants.DOCKER_URL, timeout=constants.API_CLIENT_TIMEOUT) self.log = [] + self._corresponding_common_stage_image = None def __getattr__(self, name): return self.info[name] @@ -67,6 +74,24 @@ def is_child_image(self): def is_test_promotion_enabled(self): return bool(self.info.get('enable_test_promotion')) + @property + def corresponding_common_stage_image(self): + """ + Retrieve the corresponding common stage image for a given image. + """ + return self._corresponding_common_stage_image + + @corresponding_common_stage_image.setter + def corresponding_common_stage_image(self, docker_image_object): + """ + For a pre-push stage image, it sets the value for the corresponding_common_stage_image variable. + """ + if self.to_push: + raise ValueError( + "For any pre-push stage image, corresponding common stage image should only exist if the pre-push stage image is non-pushable." + ) + self._corresponding_common_stage_image = docker_image_object + def collect_installed_packages_information(self): """ Returns an array with outcomes of the commands listed in the 'commands' array @@ -80,18 +105,19 @@ def collect_installed_packages_information(self): docker_client.containers.prune() return command_responses - def build(self): + def get_tail_logs_in_pretty_format(self, number_of_lines=10): """ - The build function builds the specified docker image - """ - self.summary["start_time"] = datetime.now() + Displays the tail of the logs. - if not self.to_build: - self.log = ["Not built"] - self.build_status = constants.NOT_BUILT - self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] - return self.build_status + :param number_of_lines: int, number of ending lines to be printed + :return: str, last number_of_lines of the logs concatenated with a new line + """ + return "\n".join(self.log[-1][-number_of_lines:]) + def update_pre_build_configuration(self): + """ + Updates image configuration before the docker client starts building the image. + """ if self.info.get("base_image_uri"): self.build_args["BASE_IMAGE"] = self.info["base_image_uri"] @@ -101,70 +127,182 @@ def build(self): if self.info.get("labels"): self.labels.update(self.info.get("labels")) - with open(self.context.context_path, "rb") as context_file: - response = [] - - for line in self.client.build( - fileobj=context_file, - path=self.dockerfile, - custom_context=True, - rm=True, - decode=True, - tag=self.ecr_url, - buildargs=self.build_args, - labels=self.labels - ): - if line.get("error") is not None: - self.context.remove() - response.append(line["error"]) - - self.log = response - self.build_status = constants.FAIL - self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] - self.summary["end_time"] = datetime.now() - - return self.build_status - - if line.get("stream") is not None: - response.append(line["stream"]) - elif line.get("status") is not None: - response.append(line["status"]) - else: - response.append(str(line)) + def build(self): + """ + The build function sets the stage for starting the docker build process for a given image. + :return: int, Build Status + """ + self.summary["start_time"] = datetime.now() + + # Confirm if building the image is required or not + if not self.to_build: + self.log.append(["Not built"]) + self.build_status = constants.NOT_BUILT + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + return self.build_status + + # Conduct some preprocessing before building the image + self.update_pre_build_configuration() + + # Start building the image + with open(self.context.context_path, "rb") as context_file: + self.docker_build(fileobj=context_file, custom_context=True) self.context.remove() - self.summary["image_size"] = int( - self.client.inspect_image(self.ecr_url)["Size"] - ) / (1024 * 1024) - if self.summary["image_size"] > self.info["image_size_baseline"] * 1.20: - response.append("Image size baseline exceeded") - response.append(f"{self.summary['image_size']} > 1.2 * {self.info['image_size_baseline']}") - response += self.collect_installed_packages_information() - self.build_status = constants.FAIL_IMAGE_SIZE_LIMIT + if self.build_status != constants.SUCCESS: + return self.build_status + + if not self.to_push: + # If this image is not supposed to be pushed, in that case, we are already done + # with building the image and do not need to conduct any further processing. + self.summary["end_time"] = datetime.now() + + # check the size after image is built. + self.image_size_check() + + # This return is necessary. Otherwise FORMATTER fails while displaying the status. + return self.build_status + + def docker_build(self, fileobj=None, custom_context=False): + """ + Uses low level Docker API Client to actually start the process of building the image. + + :param fileobj: FileObject, a readable file-like object pointing to the context tarfile. + :param custom_context: bool + :return: int, Build Status + """ + response = [f"Starting the Build Process for {self.repository}:{self.tag}"] + for line in self.client.build( + fileobj=fileobj, + path=self.dockerfile, + custom_context=custom_context, + rm=True, + decode=True, + tag=self.ecr_url, + buildargs=self.build_args, + labels=self.labels, + ): + if line.get("error") is not None: + response.append(line["error"]) + self.log.append(response) + self.build_status = constants.FAIL + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + self.summary["end_time"] = datetime.now() + + LOGGER.info(f"Docker Build Logs: \n {self.get_tail_logs_in_pretty_format(100)}") + LOGGER.error("ERROR during Docker BUILD") + LOGGER.error(f"Error message received for {self.dockerfile} while docker build: {line}") + + return self.build_status + + if line.get("stream") is not None: + response.append(line["stream"]) + elif line.get("status") is not None: + response.append(line["status"]) else: - self.build_status = constants.SUCCESS + response.append(str(line)) - for line in self.client.push( - self.repository, self.tag, stream=True, decode=True - ): - if line.get("error") is not None: - response.append(line["error"]) + self.log.append(response) - self.log = response - self.build_status = constants.FAIL - self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] - self.summary["end_time"] = datetime.now() + LOGGER.info(f"DOCKER BUILD LOGS: \n{self.get_tail_logs_in_pretty_format()}") + LOGGER.info(f"Completed Build for {self.repository}:{self.tag}") - return self.build_status - if line.get("stream") is not None: - response.append(line["stream"]) - else: - response.append(str(line)) + self.build_status = constants.SUCCESS + return self.build_status - self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] - self.summary["end_time"] = datetime.now() - self.summary["ecr_url"] = self.ecr_url - self.log = response + def image_size_check(self): + """ + Checks if the size of the image is not greater than the baseline. - return self.build_status + :return: int, Build Status + """ + response = [f"Starting image size check for {self.repository}:{self.tag}"] + self.summary["image_size"] = int(self.client.inspect_image(self.ecr_url)["Size"]) / (1024 * 1024) + if self.summary["image_size"] > self.info["image_size_baseline"] * 1.20: + response.append("Image size baseline exceeded") + response.append(f"{self.summary['image_size']} > 1.2 * {self.info['image_size_baseline']}") + response += self.collect_installed_packages_information() + self.build_status = constants.FAIL_IMAGE_SIZE_LIMIT + else: + response.append(f"Image Size Check Succeeded for {self.repository}:{self.tag}") + self.build_status = constants.SUCCESS + self.log.append(response) + + LOGGER.info(f"{self.get_tail_logs_in_pretty_format()}") + + return self.build_status + + def push_image(self, tag_value=None): + """ + Pushes the Docker image to ECR using Docker low-level API client for docker. + + :param tag_value: str, an optional variable to provide a different tag + :return: int, states if the Push was successful or not + """ + tag = tag_value + if tag_value is None: + tag = self.tag + + response = [f"Starting image Push for {self.repository}:{tag}"] + for line in self.client.push(self.repository, tag, stream=True, decode=True): + if line.get("error") is not None: + response.append(line["error"]) + self.log.append(response) + self.build_status = constants.FAIL + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + self.summary["end_time"] = datetime.now() + + LOGGER.info(f"Docker Build Logs: \n {self.get_tail_logs_in_pretty_format(100)}") + LOGGER.error("ERROR during Docker PUSH") + LOGGER.error(f"Error message received for {self.repository}:{tag} while docker push: {line}") + + return self.build_status + if line.get("stream") is not None: + response.append(line["stream"]) + else: + response.append(str(line)) + + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + self.summary["end_time"] = datetime.now() + self.summary["ecr_url"] = self.ecr_url + if "pushed_uris" not in self.summary: + self.summary["pushed_uris"] = [] + self.summary["pushed_uris"].append(f"{self.repository}:{tag}") + response.append(f"Completed Push for {self.repository}:{tag}") + self.log.append(response) + + LOGGER.info(f"DOCKER PUSH LOGS: \n {self.get_tail_logs_in_pretty_format(2)}") + return self.build_status + + def push_image_with_additional_tags(self): + """ + Pushes an already built Docker image by applying additional tags to it. + + :return: int, states if the Push was successful or not + """ + self.log.append([f"Started Tagging for {self.ecr_url}"]) + for additional_tag in self.additional_tags: + response = [f"Tagging {self.ecr_url} as {self.repository}:{additional_tag}"] + tagging_successful = self.client.tag(self.ecr_url, self.repository, additional_tag) + if not tagging_successful: + response.append(f"Tagging {self.ecr_url} with {additional_tag} unsuccessful.") + self.log.append(response) + LOGGER.error("ERROR during Tagging") + LOGGER.error(f"Tagging {self.ecr_url} with {additional_tag} unsuccessful.") + self.build_status = constants.FAIL + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + return self.build_status + response.append(f"Tagged {self.ecr_url} succussefully as {self.repository}:{additional_tag}") + self.log.append(response) + + self.build_status = self.push_image(tag_value=additional_tag) + if self.build_status != constants.SUCCESS: + return self.build_status + + self.summary["status"] = constants.STATUS_MESSAGE[self.build_status] + self.summary["end_time"] = datetime.now() + self.log.append([f"Completed Tagging for {self.ecr_url}"]) + + LOGGER.info(f"DOCKER TAG and PUSH LOGS: \n {self.get_tail_logs_in_pretty_format(5)}") + return self.build_status diff --git a/src/image_builder.py b/src/image_builder.py index 752c7fd1254a..d1017b914f87 100644 --- a/src/image_builder.py +++ b/src/image_builder.py @@ -21,14 +21,19 @@ import constants import utils +import boto3 +import itertools from context import Context from metrics import Metrics from image import DockerImage +from common_stage_image import CommonStageImage from buildspec import Buildspec from output import OutputFormatter from config import parse_dlc_developer_configs +FORMATTER = OutputFormatter(constants.PADDING) +build_context = os.getenv("BUILD_CONTEXT") def _find_image_object(images_list, image_name): """ @@ -47,11 +52,11 @@ def _find_image_object(images_list, image_name): # TODO: Abstract away to ImageBuilder class def image_builder(buildspec): - FORMATTER = OutputFormatter(constants.PADDING) BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) - IMAGES = [] + PRE_PUSH_STAGE_IMAGES = [] + COMMON_STAGE_IMAGES = [] if "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str(BUILDSPEC["framework"]): os.system("echo login into public ECR") @@ -71,7 +76,6 @@ def image_builder(buildspec): if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) - build_context = os.getenv("BUILD_CONTEXT") image_tag = ( tag_image_with_pr_number(image_config["tag"]) if build_context == "PR" @@ -86,7 +90,7 @@ def image_builder(buildspec): ) base_image_uri = None if image_config.get("base_image_name") is not None: - base_image_object = _find_image_object(IMAGES, image_config["base_image_name"]) + base_image_object = _find_image_object(PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"]) base_image_uri = base_image_object.ecr_url if image_config.get("download_artifacts") is not None: @@ -155,110 +159,270 @@ def image_builder(buildspec): "labels": labels, "extra_build_args": extra_build_args } - - image_object = DockerImage( + + # Create pre_push stage docker object + pre_push_stage_image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, - tag=image_tag, + tag=append_tag(image_tag, "pre-push"), to_build=image_config["build"], + stage=constants.PRE_PUSH_STAGE, context=context, + additional_tags=[image_tag], ) - IMAGES.append(image_object) + ##### Create Common stage docker object ##### + # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image + # to the repository. Instead, we just push its common stage image to the repository. Therefore, + # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable. + # common_stage_image_object = generate_common_stage_image_object(pre_push_stage_image_object, image_tag) + # COMMON_STAGE_IMAGES.append(common_stage_image_object) + + PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object) + FORMATTER.separator() FORMATTER.banner("DLC") - FORMATTER.title("Status") - THREADS = {} + # Standard images must be built before example images + # Example images will use standard images as base + standard_images = [image for image in PRE_PUSH_STAGE_IMAGES if "example" not in image.name.lower()] + example_images = [image for image in PRE_PUSH_STAGE_IMAGES if "example" in image.name.lower()] + ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES + IMAGES_TO_PUSH = [image for image in ALL_IMAGES if image.to_push and image.to_build] - # In the context of the ThreadPoolExecutor each instance of image.build submitted - # to it is executed concurrently in a separate thread. - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Parent images do not inherit from any containers built in this job - # Child images use one of the parent images as their base image - parent_images = [image for image in IMAGES if not image.is_child_image] - child_images = [image for image in IMAGES if image.is_child_image] + pushed_images = [] + pushed_images += process_images(standard_images, "Standard") + pushed_images += process_images(example_images, "Example") - for image in parent_images: - THREADS[image.name] = executor.submit(image.build) + assert all(image in pushed_images for image in IMAGES_TO_PUSH), "Few images could not be pushed." - # the FORMATTER.progress(THREADS) function call also waits until all threads have completed - FORMATTER.progress(THREADS) + # After the build, display logs/summary for all the images. + FORMATTER.banner("Summary") + show_build_info(ALL_IMAGES) - for image in child_images: - THREADS[image.name] = executor.submit(image.build) + FORMATTER.banner("Errors") + is_any_build_failed, is_any_build_failed_size_limit = show_build_errors(ALL_IMAGES) - # the FORMATTER.progress(THREADS) function call also waits until all threads have completed - FORMATTER.progress(THREADS) + # From all images, filter the images that were supposed to be built and upload their metrics + BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build] - FORMATTER.title("Build Logs") + FORMATTER.banner("Upload Metrics") + upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit) - if not os.path.isdir("logs"): - os.makedirs("logs") + FORMATTER.banner("Test Env") + # Set environment variables to be consumed by test jobs + test_trigger_job = utils.get_codebuild_project_name() + # Tests should only run on images that were pushed to the repository + utils.set_test_env( + IMAGES_TO_PUSH, + use_latest_additional_tag=True, + BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), + TEST_TRIGGER=test_trigger_job + ) - for image in IMAGES: - FORMATTER.title(image.name) - FORMATTER.table(image.info.items()) - FORMATTER.separator() - FORMATTER.print_lines(image.log) - with open(f"logs/{image.name}", "w") as fp: - fp.write("/n".join(image.log)) - image.summary["log"] = f"logs/{image.name}" - FORMATTER.title("Summary") +def process_images(pre_push_image_list, pre_push_image_type="Pre-push"): + """ + Handles all the tasks related to a particular type of Pre Push images. It takes in the list of + pre push images and then builds it. After the pre-push images have been built, it extracts the + corresponding common stage images for the pre-push images and builds those common stage images. + After the common stage images have been built, it finds outs the docker images that need to be + pushed and pushes them accordingly. + + Note that the common stage images should always be built after the pre-push images of a + particular kind. This is because the Common stage images use are built on respective + Standard and Example images. + + :param pre_push_image_list: list[DockerImage], list of pre-push images + :param pre_push_image_type: str, used to display the message on the logs + :return: list[DockerImage], images that were supposed to be pushed. + """ + FORMATTER.banner(f"{pre_push_image_type} Build") + build_images(pre_push_image_list) + + FORMATTER.banner(f"{pre_push_image_type} Common Build") + common_stage_image_list = [ + image.corresponding_common_stage_image + for image in pre_push_image_list + if image.corresponding_common_stage_image is not None + ] + build_images(common_stage_image_list, make_dummy_boto_client=True) + + FORMATTER.banner(f"{pre_push_image_type} Push Images") + all_images = pre_push_image_list + common_stage_image_list + images_to_push = [image for image in all_images if image.to_push and image.to_build] + push_images(images_to_push) + + FORMATTER.banner(f"{pre_push_image_type} Retagging") + retag_and_push_images(images_to_push) + return images_to_push - for image in IMAGES: + +def generate_common_stage_image_object(pre_push_stage_image_object, image_tag): + """ + Creates a common stage image object for a pre_push stage image. If for a pre_push stage image we create a common + stage image, then we do not push the pre_push stage image to the repository. Instead, we just push its common stage + image to the repository. Therefore, inside the function pre_push_stage_image_object is made NON-PUSHABLE. + + :param pre_push_stage_image_object: DockerImage, an object of class DockerImage + :return: CommonStageImage, an object of class CommonStageImage. CommonStageImage inherits DockerImage. + """ + common_stage_info = deepcopy(pre_push_stage_image_object.info) + common_stage_info["extra_build_args"].update( + {"PRE_PUSH_IMAGE": pre_push_stage_image_object.ecr_url,} + ) + common_stage_image_object = CommonStageImage( + info=common_stage_info, + dockerfile=os.path.join(os.sep, utils.get_root_folder_path(), "miscellaneous_dockerfiles", "Dockerfile.common"), + repository=pre_push_stage_image_object.repository, + tag=append_tag(image_tag, "multistage-common"), + to_build=pre_push_stage_image_object.to_build, + stage=constants.COMMON_STAGE, + additional_tags=[image_tag], + ) + pre_push_stage_image_object.to_push = False + pre_push_stage_image_object.corresponding_common_stage_image = common_stage_image_object + return common_stage_image_object + + +def show_build_info(images): + """ + Displays the build info for a list of input images. + + :param images: list[DockerImage] + """ + + if not os.path.isdir("logs"): + os.makedirs("logs") + + for image in images: + image_description = f"{image.name}-{image.stage}" + FORMATTER.title(image_description) + FORMATTER.table(image.info.items()) + + flattened_logs = list(itertools.chain(*image.log)) + with open(f"logs/{image_description}", "w") as fp: + fp.write("/n".join(flattened_logs)) + image.summary["log"] = f"logs/{image_description}" + FORMATTER.table(image.summary.items()) + + FORMATTER.title(f"Ending Logs for {image_description}") + FORMATTER.print_lines(image.log[-1][-2:]) + + +def show_build_errors(images): + """ + Iterates through each image to check if there is any image that has a failed status. In case + an image with a failed status is found, it raises an exception. + + :param images: list[DockerImage] + """ + is_any_build_failed = False + is_any_build_failed_size_limit = False + + for image in images: + if image.build_status == constants.FAIL: FORMATTER.title(image.name) - FORMATTER.table(image.summary.items()) - - FORMATTER.title("Errors") - is_any_build_failed = False - is_any_build_failed_size_limit = False - for image in IMAGES: - if image.build_status == constants.FAIL: - FORMATTER.title(image.name) - FORMATTER.print_lines(image.log[-10:]) - is_any_build_failed = True - else: - if image.build_status == constants.FAIL_IMAGE_SIZE_LIMIT: - is_any_build_failed_size_limit = True - if is_any_build_failed: - raise Exception("Build failed") + FORMATTER.print_lines(image.log[-1][-10:]) + is_any_build_failed = True else: - if is_any_build_failed_size_limit: - FORMATTER.print("Build failed. Image size limit breached.") + if image.build_status == constants.FAIL_IMAGE_SIZE_LIMIT: + is_any_build_failed_size_limit = True + if is_any_build_failed: + raise Exception("Build failed") + else: + if is_any_build_failed_size_limit: + FORMATTER.print("Build failed. Image size limit breached.") + else: + FORMATTER.print("No errors") + return is_any_build_failed, is_any_build_failed_size_limit + + +def upload_metrics(images, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit): + """ + Uploads Metrics for a list of images. + + :param images: list[DockerImage] + :param BUILDSPEC: Buildspec + :param is_any_build_failed: bool + :param is_any_build_failed_size_limit: bool + """ + metrics = Metrics( + context=constants.BUILD_CONTEXT, region=BUILDSPEC["region"], namespace=constants.METRICS_NAMESPACE, + ) + for image in images: + try: + metrics.push_image_metrics(image) + except Exception as e: + if is_any_build_failed or is_any_build_failed_size_limit: + raise Exception(f"Build failed.{e}") else: - FORMATTER.print("No errors") + raise Exception(f"Build passed. {e}") - FORMATTER.title("Uploading Metrics") - metrics = Metrics( - context=constants.BUILD_CONTEXT, - region=BUILDSPEC["region"], - namespace=constants.METRICS_NAMESPACE, - ) - for image in IMAGES: - try: - metrics.push_image_metrics(image) - except Exception as e: - if is_any_build_failed or is_any_build_failed_size_limit: - raise Exception(f"Build failed.{e}") - else: - raise Exception(f"Build passed. {e}") + if is_any_build_failed_size_limit: + raise Exception("Build failed because of file limit") - if is_any_build_failed_size_limit: - raise Exception("Build failed because of file limit") + FORMATTER.print("Metrics Uploaded") - FORMATTER.separator() - # Set environment variables to be consumed by test jobs - test_trigger_job = utils.get_codebuild_project_name() - utils.set_test_env( - IMAGES, - BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), - TEST_TRIGGER=test_trigger_job, - ) +def build_images(images, make_dummy_boto_client=False): + """ + Takes a list of images and executes their build process concurrently. + + :param images: list[DockerImage] + :param make_dummy_boto_client: bool, specifies if a dummy client should be declared or not. + + TODO: The parameter make_dummy_boto_client should be removed when get_dummy_boto_client method is removed. + """ + THREADS = {} + # In the context of the ThreadPoolExecutor each instance of image.build submitted + # to it is executed concurrently in a separate thread. + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + #### TODO: Remove this entire if block when get_dummy_boto_client is removed #### + if make_dummy_boto_client: + get_dummy_boto_client() + for image in images: + THREADS[image.name] = executor.submit(image.build) + # the FORMATTER.progress(THREADS) function call also waits until all threads have completed + FORMATTER.progress(THREADS) + + +#### TODO: Remove this entire method when https://github.com/boto/boto3/issues/1592 is resolved #### +def get_dummy_boto_client(): + """ + Makes a dummy boto3 client to ensure that boto3 clients behave in a thread safe manner. + In absence of this method, the behaviour documented in https://github.com/boto/boto3/issues/1592 is observed. + Once https://github.com/boto/boto3/issues/1592 is resolved, this method can be removed. + + :return: BotocoreClientSTS + """ + return boto3.client("sts", region_name=os.getenv("REGION")) + + +def push_images(images): + """ + Takes a list of images and PUSHES them to ECR concurrently. + + :param images: list[DockerImage] + """ + THREADS = {} + with concurrent.futures.ThreadPoolExecutor(max_workers=constants.MAX_WORKER_COUNT_FOR_PUSHING_IMAGES) as executor: + for image in images: + THREADS[image.name] = executor.submit(image.push_image) + FORMATTER.progress(THREADS) +def retag_and_push_images(images): + """ + Takes a list of images, retags them and pushes to the repository + + :param images: list[DockerImage] + """ + THREADS = {} + with concurrent.futures.ThreadPoolExecutor(max_workers=constants.MAX_WORKER_COUNT_FOR_PUSHING_IMAGES) as executor: + for image in images: + THREADS[image.name] = executor.submit(image.push_image_with_additional_tags) + FORMATTER.progress(THREADS) def tag_image_with_pr_number(image_tag): pr_number = os.getenv("CODEBUILD_SOURCE_VERSION").replace("/", "-") @@ -270,6 +434,16 @@ def tag_image_with_datetime(image_tag): return f"{image_tag}-{datetime_suffix}" +def append_tag(image_tag, append_str): + """ + Appends image_tag with append_str + + :param image_tag: str, original image tag + :param append_str: str, string to be appended + """ + return f"{image_tag}-{append_str}" + + def modify_repository_name_for_context(image_repo_uri, build_context): repo_uri_values = image_repo_uri.split("/") repo_name = repo_uri_values[-1] diff --git a/src/metrics.py b/src/metrics.py index c9141c2ecd0d..3ac7b8a0d57d 100644 --- a/src/metrics.py +++ b/src/metrics.py @@ -40,6 +40,7 @@ def push_image_metrics(self, image): "device_type": image.device_type, "python_version": image.python_version, "image_type": image.image_type, + "image_stage": image.stage, } if image.build_status == constants.NOT_BUILT: return None diff --git a/src/safety_report_generator.py b/src/safety_report_generator.py new file mode 100644 index 000000000000..b52c2804fd13 --- /dev/null +++ b/src/safety_report_generator.py @@ -0,0 +1,166 @@ +from invoke.context import Context +from datetime import datetime + +import json +import os + + +class SafetyReportGenerator: + """ + The SafetyReportGenerator class deals with the functionality of generating safety reports for running containers. + The safety report takes the following format: + [ + { + "package": "package", + "scan_status": "SUCCEEDED/FAILED/IGNORED", + "installed": "version", + "vulnerabilities": [ + { + "vulnerability_id": "safety_vulnerability_id", + "advisory": "description of the issue", + "reason_to_ignore":"reason to ignore the vulnerability_id", + "spec": "version_spec" + }, + ... + ] + "date": + } + ... + ] + """ + + def __init__(self, container_id, ignore_dict={}): + self.container_id = container_id + self.vulnerability_dict = {} + self.vulnerability_list = [] + self.ignore_dict = ignore_dict + self.ignored_vulnerability_count = {} + self.ctx = Context() + self.docker_exec_cmd = f"docker exec -i {container_id}" + self.safety_check_output = None + + def insert_vulnerabilites_into_report(self, scanned_vulnerabilities): + """ + Takes the list of vulnerabilites produced by safety scan as the input and iterates through the list to insert + the vulnerabilites into the vulnerability_dict. + + :param scanned_vulnerabilities: list[list], consists of a list of Vulnerabilities. Each vulnerability is a list itself. + """ + for vulnerability in scanned_vulnerabilities: + package, spec, installed, advisory, vulnerability_id = vulnerability[:5] + vulnerability_details = { + "vulnerability_id": vulnerability_id, + "advisory": advisory, + "spec": spec, + "reason_to_ignore": "N/A", + } + + if package not in self.ignored_vulnerability_count: + self.ignored_vulnerability_count[package] = 0 + + if vulnerability_id in self.ignore_dict: + vulnerability_details["reason_to_ignore"] = self.ignore_dict[vulnerability_id] + self.ignored_vulnerability_count[package] += 1 + + if package not in self.vulnerability_dict: + self.vulnerability_dict[package] = { + "package": package, + "scan_status": "TBD", + "installed": installed, + "vulnerabilities": [vulnerability_details], + "date": self.timestamp, + } + else: + self.vulnerability_dict[package]["vulnerabilities"].append(vulnerability_details) + + def get_package_set_from_container(self): + """ + Extracts package set of a container. + + :return: list[dict], each dict is structured like {'key': package_name, 'version':package_version} + """ + python_cmd_to_extract_package_set = """ python -c "import pkg_resources; \ + import json; \ + print(json.dumps([{'key':d.key, 'version':d.version} for d in pkg_resources.working_set]))" """ + + run_output = self.ctx.run(f"{self.docker_exec_cmd} {python_cmd_to_extract_package_set}", hide=True, warn=True) + if run_output.exited != 0: + raise Exception("Package set cannot be retrieved from the container.") + + return json.loads(run_output.stdout) + + def insert_safe_packages_into_report(self, packages): + """ + Takes the list of all the packages existing in a container and inserts safe packages into the + vulnerability_dict. + + :param packages: list[dict], each dict looks like {"key":package_name, "version":package_version} + """ + for pkg in packages: + if pkg["key"] not in self.vulnerability_dict: + self.vulnerability_dict[pkg["key"]] = { + "package": pkg["key"], + "scan_status": "SUCCEEDED", + "installed": pkg["version"], + "vulnerabilities": [ + {"vulnerability_id": "N/A", "advisory": "N/A", "reason_to_ignore": "N/A", "spec": "N/A"} + ], + "date": self.timestamp, + } + + def process_report(self): + """ + Once all the packages (safe and unsafe both) have been inserted in the vulnerability_dict, this method is called. + On being called, it processes each package within the vulnerability_dict and appends it to the vulnerability_list. + Before appending it checks if the scan_status is "TBD". If yes, it assigns the correct scan_status to the package. + """ + for (package, package_scan_results) in self.vulnerability_dict.items(): + if package_scan_results["scan_status"] == "TBD": + if len(package_scan_results["vulnerabilities"]) == self.ignored_vulnerability_count[package]: + package_scan_results["scan_status"] = "IGNORED" + else: + package_scan_results["scan_status"] = "FAILED" + self.vulnerability_list.append(package_scan_results) + + def run_safety_check_in_non_cb_context(self): + """ + Runs the safety check on the container in Non-CodeBuild Context + + :return: string, A JSON formatted string containing vulnerabilities found in the container + """ + safety_check_command = f"{self.docker_exec_cmd} safety check --json" + run_out = self.ctx.run(safety_check_command, warn=True, hide=True) + if run_out.return_code != 0: + print("safety check command returned non-zero error code. This indicates that vulnerabilities might exist.") + return run_out.stdout + + def run_safety_check_in_cb_context(self): + """ + Runs the safety check on the container in CodeBuild Context + + :return: string, A JSON formatted string containing vulnerabilities found in the container + """ + from dlc.safety_check import SafetyCheck + + return SafetyCheck().run_safety_check_on_container(self.docker_exec_cmd) + + def generate(self): + """ + Acts as a driver function for this class that initiates the entire process of running safety check and returing + the vulnerability_list + + :return: list[dict], the output follows the same format as mentioned in the description of the class + """ + self.timestamp = datetime.now().strftime("%d-%m-%Y") + if os.getenv("IS_CODEBUILD_IMAGE") is None: + self.safety_check_output = self.run_safety_check_in_non_cb_context() + elif os.getenv("IS_CODEBUILD_IMAGE").upper() == "TRUE": + self.safety_check_output = self.run_safety_check_in_cb_context() + # In case of errors, json.loads command will fail. We want the failure to occur to ensure that + # build process fails in case the safety report cannot be generated properly. + scanned_vulnerabilities = json.loads(self.safety_check_output) + self.insert_vulnerabilites_into_report(scanned_vulnerabilities) + packages = self.get_package_set_from_container() + self.insert_safe_packages_into_report(packages) + self.process_report() + return self.vulnerability_list diff --git a/src/utils.py b/src/utils.py index e6dfaacaf43a..5adbbad2d595 100644 --- a/src/utils.py +++ b/src/utils.py @@ -23,6 +23,7 @@ from config import is_build_enabled from invoke.context import Context from botocore.exceptions import ClientError +from safety_report_generator import SafetyReportGenerator LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.DEBUG) @@ -403,7 +404,7 @@ def build_setup(framework, device_types=None, image_types=None, py_versions=None os.environ[env_variable] = "true" -def fetch_dlc_images_for_test_jobs(images): +def fetch_dlc_images_for_test_jobs(images, use_latest_additional_tag=False): """ use the JobParamters.run_test_types values to pass on image ecr urls to each test type. :param images: list @@ -418,8 +419,12 @@ def fetch_dlc_images_for_test_jobs(images): continue use_preexisting_images = (build_disabled and docker_image.build_status == constants.NOT_BUILT) if docker_image.build_status == constants.SUCCESS or use_preexisting_images: + ecr_url_to_test = docker_image.ecr_url + if use_latest_additional_tag and len(docker_image.additional_tags) > 0: + ecr_url_to_test = f"{docker_image.repository}:{docker_image.additional_tags[-1]}" + # Run sanity tests on the all images built - DLC_IMAGES["sanity"].append(docker_image.ecr_url) + DLC_IMAGES["sanity"].append(ecr_url_to_test) image_job_type = docker_image.info.get("image_type") image_device_type = docker_image.info.get("device_type") image_python_version = docker_image.info.get("python_version") @@ -431,12 +436,12 @@ def fetch_dlc_images_for_test_jobs(images): constants.ALL_TESTS if constants.ALL in run_tests else run_tests ) for test in run_tests: - DLC_IMAGES[test].append(docker_image.ecr_url) + DLC_IMAGES[test].append(ecr_url_to_test) # when key is training or inference values can be (ecs, eks, ec2, sagemaker) if image_job_type in JobParameters.image_run_test_types.keys(): run_tests = JobParameters.image_run_test_types.get(image_job_type) for test in run_tests: - DLC_IMAGES[test].append(docker_image.ecr_url) + DLC_IMAGES[test].append(ecr_url_to_test) # when key is image_tag (training-cpu-py3) values can be (ecs, eks, ec2, sagemaker) if image_tag in JobParameters.image_run_test_types.keys(): run_tests = JobParameters.image_run_test_types.get(image_tag) @@ -444,7 +449,7 @@ def fetch_dlc_images_for_test_jobs(images): constants.ALL_TESTS if constants.ALL in run_tests else run_tests ) for test in run_tests: - DLC_IMAGES[test].append(docker_image.ecr_url) + DLC_IMAGES[test].append(ecr_url_to_test) for test_type in DLC_IMAGES.keys(): test_images = DLC_IMAGES[test_type] @@ -458,7 +463,7 @@ def write_to_json_file(file_name, content): json.dump(content, fp) -def set_test_env(images, images_env="DLC_IMAGES", **kwargs): +def set_test_env(images, use_latest_additional_tag=False, images_env="DLC_IMAGES", **kwargs): """ Util function to write a file to be consumed by test env with necessary environment variables @@ -471,7 +476,7 @@ def set_test_env(images, images_env="DLC_IMAGES", **kwargs): """ test_envs = [] - test_images_dict = fetch_dlc_images_for_test_jobs(images) + test_images_dict = fetch_dlc_images_for_test_jobs(images, use_latest_additional_tag=use_latest_additional_tag) # dumping the test_images to dict that can be used in src/start_testbuilds.py write_to_json_file(constants.TEST_TYPE_IMAGES_PATH, test_images_dict) @@ -488,3 +493,63 @@ def set_test_env(images, images_env="DLC_IMAGES", **kwargs): def get_codebuild_project_name(): # Default value for codebuild project name is "local_test" when run outside of CodeBuild return os.getenv("CODEBUILD_BUILD_ID", "local_test").split(":")[0] + + +def get_root_folder_path(): + """ + Extract the root folder path for the repository. + + :return: str + """ + root_dir_pattern = re.compile(r"^(\S+deep-learning-containers)") + pwd = os.getcwd() + return os.getenv("CODEBUILD_SRC_DIR", root_dir_pattern.match(pwd).group(1)) + + +def get_safety_ignore_dict(image_uri, framework, python_version, job_type): + """ + Get a dict of known safety check issue IDs to ignore, if specified in file ../data/ignore_ids_safety_scan.json. + + :param image_uri: str, consists of f"{image_repo}:{image_tag}" + :param framework: str, framework like tensorflow, mxnet etc. + :param python_version: str, py2 or py3 + :param job_type: str, type of training job. Can be "training"/"inference" + :return: dict, key is the ignored vulnerability id and value is the reason to ignore it + """ + if job_type == "inference": + job_type = ( + "inference-eia" if "eia" in image_uri else "inference-neuron" if "neuron" in image_uri else "inference" + ) + + ignore_safety_ids = {} + ignore_data_file = os.path.join(os.sep, get_root_folder_path(), "data", "ignore_ids_safety_scan.json") + with open(ignore_data_file) as f: + ignore_safety_ids = json.load(f) + + return ignore_safety_ids.get(framework, {}).get(job_type, {}).get(python_version, {}) + + +def generate_safety_report_for_image(image_uri, image_info, storage_file_path=None): + """ + Genereate safety scan reports for an image and store it at the location specified + + :param image_uri: str, consists of f"{image_repo}:{image_tag}" + :param image_info: dict, should consist of 3 keys - "framework", "python_version" and "image_type". + :param storage_file_path: str, looks like "storage_location.json" + :return: list[dict], safety report generated by SafetyReportGenerator + """ + ctx = Context() + docker_run_cmd = f"docker run -id --entrypoint='/bin/bash' {image_uri} " + container_id = ctx.run(f"{docker_run_cmd}", hide=True, warn=True).stdout.strip() + install_safety_cmd = "pip install safety" + docker_exec_cmd = f"docker exec -i {container_id}" + ctx.run(f"{docker_exec_cmd} {install_safety_cmd}", hide=True, warn=True) + ignore_dict = get_safety_ignore_dict( + image_uri, image_info["framework"], image_info["python_version"], image_info["image_type"] + ) + safety_scan_output = SafetyReportGenerator(container_id, ignore_dict=ignore_dict).generate() + ctx.run(f"docker rm -f {container_id}", hide=True, warn=True) + if storage_file_path: + with open(storage_file_path, "w", encoding="utf-8") as f: + json.dump(safety_scan_output, f, indent=4) + return safety_scan_output diff --git a/test/dlc_tests/sanity/test_safety_report_file.py b/test/dlc_tests/sanity/test_safety_report_file.py new file mode 100644 index 000000000000..ed76ab10b25c --- /dev/null +++ b/test/dlc_tests/sanity/test_safety_report_file.py @@ -0,0 +1,102 @@ +import json +import logging +import sys + +import pytest + +from invoke import run +from dataclasses import dataclass +from typing import List + + +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.INFO) +LOGGER.addHandler(logging.StreamHandler(sys.stderr)) + +SAFETY_FILE = "/opt/aws/dlc/info/safety_report.json" + + +@dataclass +class SafetyVulnerabilityAdvisory: + """ + One of the DataClasses for parsing Safety Report + """ + + vulnerability_id: str + advisory: str + reason_to_ignore: str + spec: str + + +@dataclass +class SafetyPackageVulnerabilityReport: + """ + One of the DataClasses for parsing Safety Report + """ + + package: str + scan_status: str + installed: str + vulnerabilities: List[SafetyVulnerabilityAdvisory] + date: str + + def __post_init__(self): + self.vulnerabilities = [SafetyVulnerabilityAdvisory(**i) for i in self.vulnerabilities] + + +@dataclass +class SafetyPythonEnvironmentVulnerabilityReport: + """ + One of the DataClasses for parsing Safety Report + """ + + report: List[SafetyPackageVulnerabilityReport] + + def __post_init__(self): + self.report = [SafetyPackageVulnerabilityReport(**i) for i in self.report] + + +@pytest.mark.model("N/A") +@pytest.mark.skip(reason="Will be unskipped when Safety Scan Report Generation is enabled") +def test_safety_file_exists_and_is_valid(image): + """ + Checks if the image has a safety report at the desired location and fails if any of the + packages in the report have failed the safety check. + + :param image: str, image uri + """ + repo_name, image_tag = image.split("/")[-1].split(":") + container_name = f"{repo_name}-{image_tag}-safety" + # Add null entrypoint to ensure command exits immediately + run(f"docker run -id " f"--name {container_name} " f"--entrypoint='/bin/bash' " f"{image}", hide=True, warn=True) + + try: + # Check if file exists + docker_exec_cmd = f"docker exec -i {container_name}" + safety_file_check = run(f"{docker_exec_cmd} test -f {SAFETY_FILE}", warn=True, hide=True) + assert safety_file_check.ok, f"Safety file existence test failed for {image}" + + file_content = run(f"{docker_exec_cmd} cat {SAFETY_FILE}", warn=True, hide=True) + raw_scan_result = json.loads(file_content.stdout) + safety_report_object = SafetyPythonEnvironmentVulnerabilityReport(report=raw_scan_result) + + # processing safety reports + report_log_template = ( + "SAFETY_REPORT ({status}) [pkg: {pkg}] [installed: {installed}] [vulnerabilities: {vulnerabilities}]" + ) + failed_count = 0 + for report_item in safety_report_object.report: + if report_item.scan_status == "FAILED": + failed_count += 1 + LOGGER.error( + report_log_template.format( + status="FAILED", + pkg=report_item.package, + installed=report_item.installed, + vulnerabilities=report_item.vulnerabilities, + ) + ) + assert failed_count == 0, f"{failed_count} package/s failed safety test for {image} !!!" + LOGGER.info(f"Safety report file validation is successfully complete and report exists at {SAFETY_FILE}") + finally: + run(f"docker rm -f {container_name}", hide=True, warn=True)