Skip to content

Commit

Permalink
Integrate kbatch (#1258)
Browse files Browse the repository at this point in the history
* Initial kbatch-proxy setup

* Fix scchema

* Update kbatch tf module

* Remove kbatch-image var

* Add url to jh service

* Remove local chart

* Fix

* add k8s role for kbatch

* Add name

* Add sa name

* Fix

* Add dask-gateway

* Add kbatch example jobs

* black py file

* Clean up

* Fixes

* clean up

* Update nb
  • Loading branch information
iameskild authored May 26, 2022
1 parent 44aaf37 commit 0a0de22
Show file tree
Hide file tree
Showing 15 changed files with 373 additions and 1 deletion.
3 changes: 3 additions & 0 deletions qhub/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
"monitoring": {
"enabled": True,
},
"kbatch": {
"enabled": True,
},
"cdsdashboards": {
"enabled": True,
"cds_hide_user_named_servers": True,
Expand Down
8 changes: 8 additions & 0 deletions qhub/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ class HelmExtension(Base):
overrides: typing.Optional[typing.Dict]


# ============== kbatch =============


class KBatch(Base):
enabled: bool


# ============== Monitoring =============


Expand Down Expand Up @@ -478,6 +485,7 @@ class Main(Base):
theme: Theme
profiles: Profiles
environments: typing.Dict[str, CondaEnvironment]
kbatch: typing.Optional[KBatch]
monitoring: typing.Optional[Monitoring]
clearml: typing.Optional[ClearML]
tf_extensions: typing.Optional[typing.List[QHubExtension]]
Expand Down
1 change: 1 addition & 0 deletions qhub/stages/input_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def stage_07_kubernetes_services(stage_outputs, config):
"dask-gateway-profiles": config["profiles"]["dask_worker"],
# monitoring
"monitoring-enabled": config["monitoring"]["enabled"],
"kbatch-enabled": config["kbatch"]["enabled"],
# prefect
"prefect-enabled": config.get("prefect", {}).get("enabled", False),
"prefect-token": config.get("prefect", {}).get("token", ""),
Expand Down
5 changes: 4 additions & 1 deletion qhub/template/stages/07-kubernetes-services/jupyterhub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ module "jupyterhub" {

services = concat([
"dask-gateway"
], (var.prefect-enabled ? ["prefect"] : []))
],
(var.prefect-enabled ? ["prefect"] : []),
(var.kbatch-enabled ? ["kbatch"] : [])
)

general-node-group = var.node_groups.general
user-node-group = var.node_groups.user
Expand Down
23 changes: 23 additions & 0 deletions qhub/template/stages/07-kubernetes-services/kbatch.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# ======================= VARIABLES ======================
variable "kbatch-enabled" {
description = "kbatch enabled or disabled"
type = bool
}


# ====================== RESOURCES =======================
module "kbatch" {
count = var.kbatch-enabled ? 1 : 0

source = "./modules/kubernetes/services/kbatch"

namespace = var.environment
external-url = var.endpoint

jupyterhub_api_token = module.jupyterhub.services.kbatch.api_token
node-group = var.node_groups.user

dask-gateway-address = module.dask-gateway.config.gateway.address
dask-gateway-proxy-address = module.dask-gateway.config.gateway.proxy_address
dask-worker-image = var.dask-worker-image
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: dask-gateway-job
image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0
args:
- python
- kbatch_dask_gateway_test.py
code: kbatch_dask_gateway_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: nb-job
image: mcr.microsoft.com/planetary-computer/python:latest
args:
- papermill
- kbatch_nb_test.ipynb
code: kbatch_nb_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
Start a cluster with Dask Gateway, print the dashboard link, and run some tasks.
"""
import dask
import dask_gateway
import distributed
from distributed import wait


def inc(x):
return x + 1


def main():
print(f"dask version = {dask.__version__}")
print(f"dask_gateway version = {dask_gateway.__version__}")
print(f"distributed version = {distributed.__version__}")

gateway = dask_gateway.Gateway()
options = gateway.cluster_options(use_local_defaults=False)

print("Starting cluster")
cluster = gateway.new_cluster(options)
client = cluster.get_client()
print("Dashboard at:", client.dashboard_link)

cluster.scale(2)

futures = client.map(inc, list(range(100)))
_ = wait(futures)

print("Closing cluster")
cluster.close()


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: dask-gateway-job
image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0
args:
- python
- kbatch_dask_gateway.py
code: kbatch_dask_gateway.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "cba309a0-d8ff-4c18-8d94-c380e875ac46",
"metadata": {
"tags": []
},
"source": [
"# Canonical Tests\n",
"\n",
"The goal of the work we performed on behalf of OGC/USGS was to enable users on QHub (and perhaps plain JupyterHub) to:\n",
"- [ ] run long-running notebooks or scripts\n",
"- [ ] run notebooks and scripts as cronjobs\n",
"\n",
"And as a stretch goal:\n",
"- [ ] run complex workflows that require multiple steps / noteboks or scripts\n",
"\n",
"Additional requirements:\n",
"- the notebook or script should work even after the user's JupyterLab session ends\n",
"- the notebook or script can connect to the Dask-Gateway and launch a Dask cluster\n",
"\n",
"This notebooks will serve as a \"unit test\" for the above features, for more details [see this issue](https://github.com/Quansight/ogc-management/issues/6).\n",
"\n",
"## `kbatch`\n",
"\n",
"The first two features outline above will be handled by [`kbatch`](https://github.com/kbatch-dev/kbatch). `kbatch` consists of two major components, the frontend `kbatch` and backend `kbatch-proxy`. The user submits job requests to `kbatch-proxy` and `kbatch-proxy` submits those job requests to the Kubernetes API. \n",
"\n",
"> NOTE:\n",
"> At the present, no additional features have been added to `kbatch`, we have simply integrated `kbatch-proxy` into QHub. A feature enhancement PR will need to be opened on the `kbatch` repo in order to enable cronjobs.\n",
"\n",
"### Setup\n",
"\n",
"In order to use `kbatch` in it's current form, some basic setup is required of the user. Going forward, we will assume that `kbatch-proxy` has been correctly integrated into QHub.\n",
"\n",
"1. Create or modify a conda environment by adding `kbatch`. And activate this conda environment.\n",
"\n",
"```\n",
"pip install kbatch\n",
"```\n",
"\n",
"2. Create a JupyterHub API token and configure `kbatch` to talk to the `kbatch-proxy` service.\n",
"\n",
"```\n",
"kbatch configure --kbatch-url http://kbatch-kbatch-proxy.dev.svc.cluster.local --token <JUPYTERHUB_API_TOKEN>\n",
"```\n",
"\n",
"3. Submit a job to `kbatch`\n",
"\n",
"```\n",
"kbatch job submit --name=list-files --image=alpine --command='[\"ls\", \"-lh\"]'\n",
"```\n",
"\n",
"### Run this notebook\n",
"\n",
"To run this notebook as a job, you will need an image with `papermill` (or a similar CLI tool). \n",
"\n",
"Create a configuration file, `kbatch_nb_job.yaml` like the one below:\n",
"```yaml\n",
"# filename: kbatch_nb_job.yaml\n",
"name: nb-job\n",
"image: mcr.microsoft.com/planetary-computer/python:latest\n",
"args:\n",
" - papermill\n",
" - kbatch_nb.ipynb\n",
"code: kbatch_nb.ipynb\n",
"```\n",
"\n",
"Then run:\n",
"```\n",
"kbatch job submit -f kbatch_nb_job.yaml\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ff3e90f6-f0b6-4300-81c2-b7ade75d57b4",
"metadata": {},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "277ff7d7-d938-4790-af92-5719534b08d5",
"metadata": {},
"outputs": [],
"source": [
"with open('kbatch_nb_output.txt', 'w') as f:\n",
" for i in range(0,10):\n",
" current_time = time.strftime(\"%Y-%m-%d-%H:%M:%S\", time.localtime())\n",
" time.sleep(1)\n",
" f.write(f'{current_time}: {i}\\n')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: nb-job
image: mcr.microsoft.com/planetary-computer/python:latest
args:
- papermill
- kbatch_nb.ipynb
code: kbatch_nb.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Get this name dynamtically
locals {
kbatch_service_account_name = "kbatch-kbatch-proxy"
}

resource "helm_release" "kbatch" {
name = "kbatch"
namespace = var.namespace
repository = "https://kbatch-dev.github.io/helm-chart"
chart = "kbatch-proxy"
version = "0.3.1"

values = concat([
file("${path.module}/values.yaml"),
jsonencode({
app = {
jupyterhub_api_token = var.jupyterhub_api_token
jupyterhub_api_url = "https://${var.external-url}/hub/api/"
extra_env = {
KBATCH_PREFIX = ""
KBATCH_JOB_EXTRA_ENV = jsonencode({
DASK_GATEWAY__AUTH__TYPE = "jupyterhub"
DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE = "${var.dask-worker-image.name}:${var.dask-worker-image.tag}"
DASK_GATEWAY__ADDRESS = "${var.dask-gateway-address}"
DASK_GATEWAY__PROXY_ADDRESS = "${var.dask-gateway-proxy-address}"
})
}
}
image = {
tag = "0.3.1"
}
})
])

set_sensitive {
name = "jupyterHubToken"
value = var.jupyterhub_api_token
}

set {
name = "kbatchImage"
value = var.image
}

set {
name = "namespace"
value = var.namespace
}

}

resource "kubernetes_cluster_role" "kbatch" {
metadata {
name = "${var.name}-kbatch"
}

rule {
api_groups = ["", "batch"]
resources = ["*"]
verbs = ["get", "watch", "list", "patch", "create"]
}
}


resource "kubernetes_cluster_role_binding" "kbatch" {
metadata {
name = "${var.name}-kbatch"
}

role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.kbatch.metadata.0.name
}
subject {
kind = "ServiceAccount"
name = local.kbatch_service_account_name
namespace = var.namespace
api_group = ""
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# https://github.com/kbatch-dev/helm-chart/blob/main/kbatch/values.yaml
Loading

0 comments on commit 0a0de22

Please sign in to comment.