Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubernetes support for Metaflow #644

Merged
merged 35 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
cf31c3f
Refactor @resources decorator
savingoyal Jul 17, 2021
c109bbb
Update __init__.py
savingoyal Jul 17, 2021
d93b980
Refactor @batch decorator
savingoyal Jul 17, 2021
89aa5d4
more change
savingoyal Jul 17, 2021
68123d3
more changes
savingoyal Jul 17, 2021
4e6becc
more changes
savingoyal Jul 17, 2021
f60b18a
@kubernetes
savingoyal Aug 11, 2021
ff12dfe
Kubernetes
savingoyal Aug 13, 2021
2590cea
More changes
savingoyal Aug 19, 2021
a04608a
More changes
savingoyal Aug 21, 2021
18ea0b8
more changes
savingoyal Aug 24, 2021
38baa27
some more changes
savingoyal Aug 26, 2021
fc0bac4
more changes
savingoyal Aug 26, 2021
e54f79f
add disk space
savingoyal Aug 26, 2021
433c1eb
Add todos
savingoyal Aug 26, 2021
1984872
some fixes
savingoyal Aug 27, 2021
038d9d5
add k8s testing context
savingoyal Aug 27, 2021
6d0fb91
more changes
savingoyal Aug 27, 2021
e615fcc
some more changes
savingoyal Aug 28, 2021
f8a4c4e
minor fixups
savingoyal Aug 28, 2021
c784b16
better error handling for evicted pods (#711)
oavdeev Sep 22, 2021
c8d5610
fixes for pod/job metadata race conditions (#704)
oavdeev Oct 7, 2021
8ab1336
K8S: label value sanitizer (#719)
oavdeev Oct 8, 2021
fb73234
rename name_space to namespace for k8s plugin (#750)
oavdeev Oct 12, 2021
987a6ee
fix k8s attribute handling bug (#753)
oavdeev Oct 12, 2021
f9042d8
tweak k8s test resources (to run on kind) (#754)
oavdeev Oct 12, 2021
f58ae58
add k8s api retries (#756)
oavdeev Oct 13, 2021
b471d12
Merge branch 'master' into plugin-linter
savingoyal Oct 13, 2021
135c9a6
update done marker
savingoyal Oct 13, 2021
ce1e0f7
Merge branch 'plugin-linter' into kubernetes-pr
savingoyal Oct 13, 2021
92c8d25
Use linux binaries in @conda when run in k8s (#758)
sappier Oct 13, 2021
093c7ed
Merge branch 'master' into kubernetes-pr
savingoyal Oct 15, 2021
fb5f558
fix comment
savingoyal Oct 15, 2021
2eb6ba9
fix merge conflict
savingoyal Oct 15, 2021
2bf5303
update char
savingoyal Oct 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion metaflow/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ def get_plugin_cli():
# Add new CLI commands in this list
from . import package_cli
from .aws.batch import batch_cli
from .aws.eks import kubernetes_cli
from .aws.step_functions import step_functions_cli

return _ext_plugins.get_plugin_cli() + [
package_cli.cli,
batch_cli.cli,
kubernetes_cli.cli,
step_functions_cli.cli]


Expand All @@ -95,7 +97,9 @@ def _merge_lists(base, overrides, attr):
from .timeout_decorator import TimeoutDecorator
from .environment_decorator import EnvironmentDecorator
from .retry_decorator import RetryDecorator
from .aws.batch.batch_decorator import BatchDecorator, ResourcesDecorator
from .resources_decorator import ResourcesDecorator
from .aws.batch.batch_decorator import BatchDecorator
from .aws.eks.kubernetes_decorator import KubernetesDecorator
from .aws.step_functions.step_functions_decorator \
import StepFunctionsInternalDecorator
from .test_unbounded_foreach_decorator\
Expand All @@ -108,6 +112,7 @@ def _merge_lists(base, overrides, attr):
ResourcesDecorator,
RetryDecorator,
BatchDecorator,
KubernetesDecorator,
StepFunctionsInternalDecorator,
CondaStepDecorator,
InternalTestUnboundedForeachDecorator],
Expand Down
108 changes: 108 additions & 0 deletions metaflow/plugins/aws/aws_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import re
import tarfile

from distutils.dir_util import copy_tree
try:
# python2
from urlparse import urlparse
except: # noqa E722
# python3
from urllib.parse import urlparse

from metaflow import util
from metaflow.datastore import MetaflowDataStore
from metaflow.datastore.local import LocalDataStore
from metaflow.datastore.util.s3util import get_s3_client

def sync_metadata_to_S3(metadata_local_dir, datastore_root, retry_count):
with util.TempDir() as td:
tar_file_path = os.path.join(td, 'metadata.tgz')
with tarfile.open(tar_file_path, 'w:gz') as tar:
tar.add(metadata_local_dir)
# Upload metadata to Amazon S3.
with open(tar_file_path, 'rb') as f:
s3, _ = get_s3_client()
url = urlparse(
os.path.join(
datastore_root,
MetaflowDataStore.filename_with_attempt_prefix(
'metadata.tgz',
retry_count)))
s3.upload_fileobj(f, url.netloc, url.path.lstrip('/'))

def sync_metadata_from_S3(metadata_local_dir, datastore_root, retry_count):
def echo_none(*args, **kwargs):
pass
url = urlparse(
os.path.join(
datastore_root,
MetaflowDataStore.filename_with_attempt_prefix(
'metadata.tgz',
retry_count)))
s3, err = get_s3_client()
try:
s3.head_object(Bucket=url.netloc, Key=url.path.lstrip('/'))
with util.TempDir() as td:
tar_file_path = os.path.join(td, 'metadata.tgz')
with open(tar_file_path, 'wb') as f:
s3.download_fileobj(url.netloc, url.path.lstrip('/'), f)
with tarfile.open(tar_file_path, 'r:gz') as tar:
tar.extractall(td)
copy_tree(
os.path.join(td, metadata_local_dir),
LocalDataStore.get_datastore_root_from_config(echo_none),
update=True)
except err as e:
# Metadata sync is best effort.
pass

def get_docker_registry(image_uri):
"""
Explanation:
(.+?(?:[:.].+?)\/)? - [GROUP 0] REGISTRY
.+? - A registry must start with at least one character
(?:[:.].+?)\/ - A registry must have ":" or "." and end with "/"
? - Make a registry optional
(.*?) - [GROUP 1] REPOSITORY
.*? - Get repository name until separator
(?:[@:])? - SEPARATOR
?: - Don't capture separator
[@:] - The separator must be either "@" or ":"
? - The separator is optional
((?<=[@:]).*)? - [GROUP 2] TAG / DIGEST
(?<=[@:]) - A tag / digest must be preceeded by "@" or ":"
.* - Capture rest of tag / digest
? - A tag / digest is optional
Examples:
image
- None
- image
- None
example/image
- None
- example/image
- None
example/image:tag
- None
- example/image
- tag
example.domain.com/example/image:tag
- example.domain.com/
- example/image
- tag
123.123.123.123:123/example/image:tag
- 123.123.123.123:123/
- example/image
- tag
example.domain.com/example/image@sha256:45b23dee0
- example.domain.com/
- example/image
- sha256:45b23dee0
"""

pattern = re.compile(r"^(.+?(?:[:.].+?)\/)?(.*?)(?:[@:])?((?<=[@:]).*)?$")
registry, repository, tag = pattern.match(image_uri).groups()
if registry is not None:
registry = registry.rstrip("/")
return registry
Loading