-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
test: pipeline to check vulnerabilities for KFP images #5066
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.PHONY: vulnz-check | ||
vulnz-check: | ||
gcloud builds submit --config vulnz_check.yaml --project ml-pipeline-test |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Vulnerability Check Pipeline | ||
|
||
Background context that explored the process we want and the options we have: [#3857](https://github.com/kubeflow/pipelines/issues/3857). | ||
|
||
The pipeline uses Kritis Signer to check vulnerabilities using Google Cloud vulnerability scanning. | ||
|
||
For reference, [we can use Kritis Signer to check vulnerabilities using a policy](https://cloud.google.com/binary-authorization/docs/creating-attestations-kritis#check-only). | ||
|
||
There are two pipelines in this folder: | ||
|
||
* `mirror_images.py` pipeline is a helper to mirror images to someone's own gcr registry, because Google Cloud vulnerability scanning can only be used in an owned registry. | ||
* `vulnz_check.py` pipeline checks vulnerability against a predefined policy and allowlist. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# %% | ||
from typing import NamedTuple | ||
import os | ||
|
||
import kfp | ||
from kfp import dsl | ||
from kfp.components import func_to_container_op, InputPath, OutputPath | ||
|
||
# %% | ||
# Mirror Image | ||
|
||
|
||
def mirror_image( | ||
image: str, | ||
source_registry: str, | ||
destination_registry: str, | ||
tag: str = '', | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The component should probably output the destination_image, so that it can be chained. |
||
source_image = '{}/{}:{}'.format(source_registry, image, tag) | ||
destination_image = '{}/{}:{}'.format(destination_registry, image, tag) | ||
import subprocess | ||
subprocess.run([ | ||
'gcloud', 'container', 'images', 'add-tag', source_image, | ||
destination_image | ||
]) | ||
|
||
|
||
mirror_image_component = kfp.components.create_component_from_func( | ||
mirror_image, base_image='google/cloud-sdk:alpine' | ||
) | ||
|
||
|
||
# Combining all pipelines together in a single pipeline | ||
@dsl.pipeline( | ||
name='Mirror Images', | ||
description= | ||
'A pipeline that mirrors gcr images from one repository to another.' | ||
) | ||
def mirror_images_pipeline( | ||
version: str = '1.3.0', | ||
source_registry: str = 'gcr.io/ml-pipeline', | ||
destination_registry: str = 'gcr.io/gongyuan-pipeline-test/dev' | ||
): | ||
images = [ | ||
'persistenceagent', 'scheduledworkflow', 'frontend', | ||
'viewer-crd-controller', 'visualization-server', 'inverse-proxy-agent', | ||
'metadata-writer', 'cache-server', 'cache-deployer', 'metadata-envoy' | ||
] | ||
with kfp.dsl.ParallelFor(images) as image: | ||
mirror_image_task = mirror_image_component( | ||
image=image, | ||
source_registry=source_registry, | ||
destination_registry=destination_registry, | ||
tag=version, | ||
) | ||
|
||
|
||
# %% | ||
if __name__ == '__main__': | ||
# Submit the pipeline for execution: | ||
if os.getenv('KFP_HOST') is None: | ||
print('KFP_HOST env var is not set') | ||
exit(1) | ||
kfp.Client(host=os.getenv('KFP_HOST')).create_run_from_pipeline_func( | ||
mirror_images_pipeline, arguments={'version': '1.3.0'} | ||
) | ||
|
||
# Compiling the pipeline | ||
# kfp.compiler.Compiler().compile(mirror_images_pipeline, __file__ + '.yaml') | ||
|
||
# %% |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
apiVersion: argoproj.io/v1alpha1 | ||
kind: Workflow | ||
metadata: | ||
generateName: mirror-images- | ||
annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.3.0, pipelines.kubeflow.org/pipeline_compilation_time: '2021-02-02T16:03:40.325068', | ||
pipelines.kubeflow.org/pipeline_spec: '{"description": "A pipeline that mirrors | ||
gcr images from one repository to another.", "inputs": [{"default": "1.3.0", | ||
"name": "version", "optional": true, "type": "String"}, {"default": "gcr.io/ml-pipeline", | ||
"name": "source_registry", "optional": true, "type": "String"}, {"default": | ||
"gcr.io/gongyuan-pipeline-test/dev", "name": "destination_registry", "optional": | ||
true, "type": "String"}], "name": "Mirror Images"}'} | ||
labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.3.0} | ||
spec: | ||
entrypoint: mirror-images | ||
templates: | ||
- name: for-loop-for-loop-9af0a19b-1 | ||
inputs: | ||
parameters: | ||
- {name: destination_registry} | ||
- {name: loop-item-param-9af0a19b} | ||
- {name: source_registry} | ||
- {name: version} | ||
dag: | ||
tasks: | ||
- name: mirror-image | ||
template: mirror-image | ||
arguments: | ||
parameters: | ||
- {name: destination_registry, value: '{{inputs.parameters.destination_registry}}'} | ||
- {name: loop-item-param-9af0a19b, value: '{{inputs.parameters.loop-item-param-9af0a19b}}'} | ||
- {name: source_registry, value: '{{inputs.parameters.source_registry}}'} | ||
- {name: version, value: '{{inputs.parameters.version}}'} | ||
- name: mirror-image | ||
container: | ||
args: [--image, '{{inputs.parameters.loop-item-param-9af0a19b}}', --source-registry, | ||
'{{inputs.parameters.source_registry}}', --destination-registry, '{{inputs.parameters.destination_registry}}', | ||
--tag, '{{inputs.parameters.version}}'] | ||
command: | ||
- sh | ||
- -ec | ||
- | | ||
program_path=$(mktemp) | ||
echo -n "$0" > "$program_path" | ||
python3 -u "$program_path" "$@" | ||
- | | ||
def mirror_image( | ||
image, | ||
source_registry, | ||
destination_registry, | ||
tag = '', | ||
): | ||
source_image = '{}/{}:{}'.format(source_registry, image, tag) | ||
destination_image = '{}/{}:{}'.format(destination_registry, image, tag) | ||
import subprocess | ||
subprocess.run([ | ||
'gcloud', 'container', 'images', 'add-tag', source_image, | ||
destination_image | ||
]) | ||
|
||
import argparse | ||
_parser = argparse.ArgumentParser(prog='Mirror image', description='') | ||
_parser.add_argument("--image", dest="image", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--source-registry", dest="source_registry", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--destination-registry", dest="destination_registry", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--tag", dest="tag", type=str, required=False, default=argparse.SUPPRESS) | ||
_parsed_args = vars(_parser.parse_args()) | ||
|
||
_outputs = mirror_image(**_parsed_args) | ||
image: google/cloud-sdk:alpine | ||
inputs: | ||
parameters: | ||
- {name: destination_registry} | ||
- {name: loop-item-param-9af0a19b} | ||
- {name: source_registry} | ||
- {name: version} | ||
metadata: | ||
annotations: {pipelines.kubeflow.org/component_spec: '{"implementation": {"container": | ||
{"args": ["--image", {"inputValue": "image"}, "--source-registry", {"inputValue": | ||
"source_registry"}, "--destination-registry", {"inputValue": "destination_registry"}, | ||
{"if": {"cond": {"isPresent": "tag"}, "then": ["--tag", {"inputValue": "tag"}]}}], | ||
"command": ["sh", "-ec", "program_path=$(mktemp)\necho -n \"$0\" > \"$program_path\"\npython3 | ||
-u \"$program_path\" \"$@\"\n", "def mirror_image(\n image,\n source_registry,\n destination_registry,\n tag | ||
= '''',\n):\n source_image = ''{}/{}:{}''.format(source_registry, image, | ||
tag)\n destination_image = ''{}/{}:{}''.format(destination_registry, | ||
image, tag)\n import subprocess\n subprocess.run([\n ''gcloud'', | ||
''container'', ''images'', ''add-tag'', source_image,\n destination_image\n ])\n\nimport | ||
argparse\n_parser = argparse.ArgumentParser(prog=''Mirror image'', description='''')\n_parser.add_argument(\"--image\", | ||
dest=\"image\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--source-registry\", | ||
dest=\"source_registry\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--destination-registry\", | ||
dest=\"destination_registry\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--tag\", | ||
dest=\"tag\", type=str, required=False, default=argparse.SUPPRESS)\n_parsed_args | ||
= vars(_parser.parse_args())\n\n_outputs = mirror_image(**_parsed_args)\n"], | ||
"image": "google/cloud-sdk:alpine"}}, "inputs": [{"name": "image", "type": | ||
"String"}, {"name": "source_registry", "type": "String"}, {"name": "destination_registry", | ||
"type": "String"}, {"default": "", "name": "tag", "optional": true, "type": | ||
"String"}], "name": "Mirror image"}', pipelines.kubeflow.org/component_ref: '{}', | ||
pipelines.kubeflow.org/arguments.parameters: '{"destination_registry": "{{inputs.parameters.destination_registry}}", | ||
"image": "{{inputs.parameters.loop-item-param-9af0a19b}}", "source_registry": | ||
"{{inputs.parameters.source_registry}}", "tag": "{{inputs.parameters.version}}"}'} | ||
- name: mirror-images | ||
inputs: | ||
parameters: | ||
- {name: destination_registry} | ||
- {name: source_registry} | ||
- {name: version} | ||
dag: | ||
tasks: | ||
- name: for-loop-for-loop-9af0a19b-1 | ||
template: for-loop-for-loop-9af0a19b-1 | ||
arguments: | ||
parameters: | ||
- {name: destination_registry, value: '{{inputs.parameters.destination_registry}}'} | ||
- {name: loop-item-param-9af0a19b, value: '{{item}}'} | ||
- {name: source_registry, value: '{{inputs.parameters.source_registry}}'} | ||
- {name: version, value: '{{inputs.parameters.version}}'} | ||
withItems: [persistenceagent, scheduledworkflow, frontend, viewer-crd-controller, | ||
visualization-server, inverse-proxy-agent, metadata-writer, cache-server, | ||
cache-deployer, metadata-envoy] | ||
arguments: | ||
parameters: | ||
- {name: version, value: 1.3.0} | ||
- {name: source_registry, value: gcr.io/ml-pipeline} | ||
- {name: destination_registry, value: gcr.io/gongyuan-pipeline-test/dev} | ||
serviceAccountName: pipeline-runner |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
# %% | ||
from typing import NamedTuple, List | ||
import os | ||
|
||
import kfp | ||
from kfp import dsl | ||
from kfp.components import func_to_container_op, InputPath, OutputPath | ||
|
||
# %% | ||
# Vulnerability checking | ||
|
||
|
||
def fetch_image_digest(image: str) -> str: | ||
""" Fetch digest of an image. | ||
|
||
Args: | ||
image (str): image url | ||
|
||
Returns: | ||
str: full image url with sha256 digest | ||
""" | ||
import subprocess | ||
import sys | ||
digest = subprocess.check_output([ | ||
'gcloud', 'container', 'images', 'describe', image, | ||
"--format=value(image_summary.fully_qualified_digest)" | ||
]).decode(sys.stdout.encoding) | ||
return digest | ||
|
||
|
||
fetch_image_digest_component = kfp.components.create_component_from_func( | ||
fetch_image_digest, base_image='google/cloud-sdk:alpine' | ||
) | ||
|
||
kritis_check = kfp.components.load_component_from_text( | ||
''' | ||
name: Kritis Check | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future we can put this component YAML in /components/. Maybe in |
||
inputs: | ||
- {name: Image, type: String} | ||
outputs: | ||
- {name: Vulnerability Report, type: String} | ||
metadata: | ||
annotations: | ||
author: Yuan Gong <gongyuan94@gmail.com> | ||
implementation: | ||
container: | ||
image: gcr.io/gongyuan-pipeline-test/kritis-signer | ||
command: | ||
- /bin/bash | ||
- -exc | ||
- | | ||
set -o pipefail | ||
|
||
export PATH=/bin # this image does not have PATH defined | ||
mkdir -p "$(dirname "$1")"; | ||
mkdir -p /workspace | ||
cd /workspace | ||
cat >policy.yaml <<EOF | ||
apiVersion: kritis.grafeas.io/v1beta1 | ||
kind: VulnzSigningPolicy | ||
metadata: | ||
name: kfp-vsp | ||
spec: | ||
imageVulnerabilityRequirements: | ||
maximumFixableSeverity: MEDIUM | ||
maximumUnfixableSeverity: HIGH | ||
allowlistCVEs: | ||
- projects/goog-vulnz/notes/CVE-2019-19814 | ||
EOF | ||
cat policy.yaml | ||
|
||
/kritis/signer \ | ||
-v=10 \ | ||
-logtostderr \ | ||
-image="$0" \ | ||
-policy="policy.yaml" \ | ||
-mode=check-only \ | ||
|& tee "$1" | ||
|
||
- inputValue: Image | ||
- outputPath: Vulnerability Report | ||
''' | ||
) | ||
|
||
|
||
@func_to_container_op | ||
def kfp_images(version: str, registry_url: str) -> str: | ||
images = [ | ||
'persistenceagent', 'scheduledworkflow', 'frontend', | ||
'viewer-crd-controller', 'visualization-server', 'inverse-proxy-agent', | ||
'metadata-writer', 'cache-server', 'cache-deployer', 'metadata-envoy' | ||
] | ||
import json | ||
return json.dumps([ | ||
'{}/{}:{}'.format(registry_url, image, version) for image in images | ||
]) | ||
|
||
|
||
# Combining all pipelines together in a single pipeline | ||
@dsl.pipeline( | ||
name='Vulnerability Checking', | ||
description='A pipeline to check vulnerability for all Kubeflow Pipelines images' | ||
) | ||
def vulnz_check_pipeline( | ||
version: str = '1.3.0', | ||
registry_url: str = 'gcr.io/gongyuan-pipeline-test/dev' | ||
): | ||
kfp_images_task = kfp_images(registry_url=registry_url, version=version) | ||
with kfp.dsl.ParallelFor(kfp_images_task.output) as image: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you tried:
|
||
fetch_image_digest_task = fetch_image_digest_component(image=image) | ||
vulnz_check_task = kritis_check(image=fetch_image_digest_task.output) | ||
|
||
|
||
# %% | ||
if __name__ == '__main__': | ||
kfp_host = os.getenv('KFP_HOST') | ||
if kfp_host is None: | ||
print('KFP_HOST env var is not set') | ||
exit(1) | ||
client = kfp.Client(host=kfp_host) | ||
# Submit the pipeline for execution: | ||
run = client.create_run_from_pipeline_func( | ||
vulnz_check_pipeline, arguments={'version': '1.3.0'} | ||
) | ||
print('Run details:') | ||
print('{}/#/runs/details/{}'.format(kfp_host, run.run_id)) | ||
timeout_in_seconds = 5 * 60 | ||
run_result = client.wait_for_run_completion(run.run_id, timeout_in_seconds) | ||
print(run_result.run.status) # Failed or ... | ||
|
||
# Compiling the pipeline | ||
# kfp.compiler.Compiler().compile(vulnz_check_pipeline, __file__ + '.yaml') | ||
|
||
# %% |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this component is easier to define in YAML form, since it just calls a command-line program.