Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for scaling but non-rolling deploys #26

Merged
merged 2 commits into from
Mar 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docs/deploying.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,19 @@ before starting the new one, resulting in a downtime of a few seconds
This way, there is no risk of multiple containers writing to
the same data at the same time.

If `scaling` is given and larger than zero (so also when 1), a
If `scale` is given and larger than zero (so also when 1), a
zero-downtime deployment is possible, because the new containers will be
started and given time to start up before the old containers are
stopped. Note that in this case MyPaas assumes that the container is ready
within 5s. You probably also want to specify `healthcheck` so that
Traefik will not use a container before it is ready.

If `scale` contains the word 'safe', the deployment is non-rolling. All old
containers are stopped before the new ones are started, avoiding a client
from obtaining a mix of old and new assets (especially important for
apps that use a caching service worker). For symmetry, the word 'roll' can be
used for rolling deploys.

### mypaas.healthcheck

A value consisting of three values, e.g. "/status 10s 2s", representing
Expand Down
74 changes: 72 additions & 2 deletions mypaas/server/_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def get_deploy_generator(deploy_dir):
port = 80
portmaps = []
scale = None
scale_option = "roll"
urls = []
volumes = []
envvars = {}
Expand Down Expand Up @@ -98,6 +99,10 @@ def get_deploy_generator(deploy_dir):
elif key == "mypaas.publish":
portmaps.append(val)
elif key == "mypaas.scale":
for opt in ("safe", "roll"):
if opt in val:
scale_option = opt
val = val.replace(opt, "").strip()
scale = int(val)
elif key == "mypaas.healthcheck":
parts = val.split()
Expand Down Expand Up @@ -243,7 +248,14 @@ def label(x):

# Deploy!
if scale and scale > 0:
return _deploy_scale(container_infos, deploy_dir, service_name, cmd, scale)
if scale_option == "roll":
return _deploy_scale_roll(
container_infos, deploy_dir, service_name, cmd, scale
)
else:
return _deploy_scale_safe(
container_infos, deploy_dir, service_name, cmd, scale
)
else:
return _deploy_no_scale(container_infos, deploy_dir, service_name, cmd)

Expand Down Expand Up @@ -301,7 +313,7 @@ def _deploy_no_scale(container_infos, deploy_dir, service_name, prepared_cmd):
yield f"done deploying {service_name}"


def _deploy_scale(container_infos, deploy_dir, service_name, prepared_cmd, scale):
def _deploy_scale_safe(container_infos, deploy_dir, service_name, prepared_cmd, scale):
image_name = clean_name(service_name, ".-:/")
base_container_name = clean_name(image_name, ".-")

Expand All @@ -315,6 +327,64 @@ def _deploy_scale(container_infos, deploy_dir, service_name, prepared_cmd, scale
old_ids = get_id_name_for_this_service(container_infos)
unique = str(int(time.time()))

yield f"renaming {len(old_ids)} current containers"
for i, id in enumerate(old_ids.keys()):
try:
dockercall("rename", id, base_container_name + f".old.{unique}.{i+1}")
except Exception:
yield "Rename failed. Probably a crashed container -> removing!"
dockercall("rm", "-f", id, fail_ok=True)

for id, name in old_ids.items():
yield f"stopping container (was {name})"
dockercall("stop", id, fail_ok=True)

# Keep track of started containers, in case we must shut them down
new_pool = []

try:
for i in range(scale):
new_name = f"{base_container_name}.{i+1}"
yield f"starting new container {new_name}"
new_pool.append(new_name)
cmd = prepared_cmd.copy()
cmd.append(f"--env=MYPAAS_CONTAINER={new_name}")
cmd.extend([f"--name={new_name}", image_name])
dockercall(*cmd)
except Exception:
yield "fail -> recovering"
for name in new_pool:
dockercall("stop", name, fail_ok=True)
dockercall("rm", name, fail_ok=True)
for id, name in old_ids.items():
dockercall("rename", id, name, fail_ok=True)
dockercall("start", id, fail_ok=True)
raise
else:
yield f"removing {len(old_ids)} old containers"
for id in old_ids.keys():
dockercall("rm", id, fail_ok=True)

yield "pruning"
dockercall("container", "prune", "--force")
dockercall("image", "prune", "--force")
yield f"done deploying {service_name}"


def _deploy_scale_roll(container_infos, deploy_dir, service_name, prepared_cmd, scale):
image_name = clean_name(service_name, ".-:/")
base_container_name = clean_name(image_name, ".-")

yield ""
yield f"rolling deploy of {service_name} to containers {base_container_name}.1..{scale}"
time.sleep(1)

yield "building image"
dockercall("build", "-t", image_name, deploy_dir)

old_ids = get_id_name_for_this_service(container_infos)
unique = str(int(time.time()))

yield f"renaming {len(old_ids)} current containers (and wait 2s)"
for i, id in enumerate(old_ids.keys()):
try:
Expand Down
2 changes: 2 additions & 0 deletions mypaas/server/_traefik.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def init_router():
middlewares = ["auth"]
[http.routers.api.tls]
certresolver = "default"
options= "intermediate"

# The routing for mypaas daemon
[http.routers.mypaas-daemon-router]
Expand All @@ -147,6 +148,7 @@ def init_router():
service = "mypaas-daemon"
[http.routers.mypaas-daemon-router.tls]
certresolver = "default"
options= "intermediate"
[http.services.mypaas-daemon.loadBalancer]
[[http.services.mypaas-daemon.loadBalancer.servers]]
url = "http://127.0.0.1:88"
Expand Down