-
Notifications
You must be signed in to change notification settings - Fork 806
/
Copy pathjupyterhub_config.py
506 lines (442 loc) · 17.9 KB
/
jupyterhub_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
# load the config object (satisfies linters)
c = get_config() # noqa
import glob
import os
import re
import sys
from jupyterhub.utils import url_path_join
from kubernetes_asyncio import client
from tornado.httpclient import AsyncHTTPClient
# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
configuration_directory = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, configuration_directory)
from z2jh import (
get_config,
get_name,
get_name_env,
get_secret_value,
set_config_if_not_none,
)
def camelCaseify(s):
"""convert snake_case to camelCase
For the common case where some_value is set from someValue
so we don't have to specify the name twice.
"""
return re.sub(r"_([a-z])", lambda m: m.group(1).upper(), s)
# Configure JupyterHub to use the curl backend for making HTTP requests,
# rather than the pure-python implementations. The default one starts
# being too slow to make a large number of requests to the proxy API
# at the rate required.
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
c.JupyterHub.spawner_class = "kubespawner.KubeSpawner"
# Connect to a proxy running in a different pod. Note that *_SERVICE_*
# environment variables are set by Kubernetes for Services
c.ConfigurableHTTPProxy.api_url = (
f'http://{get_name("proxy-api")}:{get_name_env("proxy-api", "_SERVICE_PORT")}'
)
c.ConfigurableHTTPProxy.should_start = False
# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False
# Check that the proxy has routes appropriately setup
c.JupyterHub.last_activity_interval = 60
# Don't wait at all before redirecting a spawning user to the progress page
c.JupyterHub.tornado_settings = {
"slow_spawn_timeout": 0,
}
# configure the hub db connection
db_type = get_config("hub.db.type")
if db_type == "sqlite-pvc":
c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
c.JupyterHub.db_url = "sqlite://"
else:
set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")
db_password = get_secret_value("hub.db.password", None)
if db_password is not None:
if db_type == "mysql":
os.environ["MYSQL_PWD"] = db_password
elif db_type == "postgres":
os.environ["PGPASSWORD"] = db_password
else:
print(f"Warning: hub.db.password is ignored for hub.db.type={db_type}")
# c.JupyterHub configuration from Helm chart's configmap
for trait, cfg_key in (
("concurrent_spawn_limit", None),
("active_server_limit", None),
("base_url", None),
("allow_named_servers", None),
("named_server_limit_per_user", None),
("authenticate_prometheus", None),
("redirect_to_server", None),
("shutdown_on_logout", None),
("template_paths", None),
("template_vars", None),
):
if cfg_key is None:
cfg_key = camelCaseify(trait)
set_config_if_not_none(c.JupyterHub, trait, "hub." + cfg_key)
# hub_bind_url configures what the JupyterHub process within the hub pod's
# container should listen to.
hub_container_port = 8081
c.JupyterHub.hub_bind_url = f"http://:{hub_container_port}"
# hub_connect_url is the URL for connecting to the hub for use by external
# JupyterHub services such as the proxy. Note that *_SERVICE_* environment
# variables are set by Kubernetes for Services.
c.JupyterHub.hub_connect_url = (
f'http://{get_name("hub")}:{get_name_env("hub", "_SERVICE_PORT")}'
)
# implement common labels
# This mimics the jupyterhub.commonLabels helper, but declares managed-by to
# kubespawner instead of helm.
#
# The labels app and release are old labels enabled to be deleted in z2jh 5, but
# for now retained to avoid a breaking change in z2jh 4 that would force user
# server restarts. Restarts would be required because NetworkPolicy resources
# must select old/new pods with labels that then needs to be seen on both
# old/new pods, and we want these resources to keep functioning for old/new user
# server pods during an upgrade.
#
common_labels = c.KubeSpawner.common_labels = {}
common_labels["app.kubernetes.io/name"] = common_labels["app"] = get_config(
"nameOverride",
default=get_config("Chart.Name", "jupyterhub"),
)
release = get_config("Release.Name")
if release:
common_labels["app.kubernetes.io/instance"] = common_labels["release"] = release
chart_name = get_config("Chart.Name")
chart_version = get_config("Chart.Version")
if chart_name and chart_version:
common_labels["helm.sh/chart"] = common_labels["chart"] = (
f"{chart_name}-{chart_version.replace('+', '_')}"
)
common_labels["app.kubernetes.io/managed-by"] = "kubespawner"
c.KubeSpawner.namespace = os.environ.get("POD_NAMESPACE", "default")
# Max number of consecutive failures before the Hub restarts itself
set_config_if_not_none(
c.Spawner,
"consecutive_failure_limit",
"hub.consecutiveFailureLimit",
)
for trait, cfg_key in (
("pod_name_template", None),
("start_timeout", None),
("image_pull_policy", "image.pullPolicy"),
# ('image_pull_secrets', 'image.pullSecrets'), # Managed manually below
("events_enabled", "events"),
("extra_labels", None),
("extra_annotations", None),
# ("allow_privilege_escalation", None), # Managed manually below
("uid", None),
("fs_gid", None),
("service_account", "serviceAccountName"),
("storage_extra_labels", "storage.extraLabels"),
# ("tolerations", "extraTolerations"), # Managed manually below
("node_selector", None),
("node_affinity_required", "extraNodeAffinity.required"),
("node_affinity_preferred", "extraNodeAffinity.preferred"),
("pod_affinity_required", "extraPodAffinity.required"),
("pod_affinity_preferred", "extraPodAffinity.preferred"),
("pod_anti_affinity_required", "extraPodAntiAffinity.required"),
("pod_anti_affinity_preferred", "extraPodAntiAffinity.preferred"),
("lifecycle_hooks", None),
("init_containers", None),
("extra_containers", None),
("mem_limit", "memory.limit"),
("mem_guarantee", "memory.guarantee"),
("cpu_limit", "cpu.limit"),
("cpu_guarantee", "cpu.guarantee"),
("extra_resource_limits", "extraResource.limits"),
("extra_resource_guarantees", "extraResource.guarantees"),
("environment", "extraEnv"),
("profile_list", None),
("extra_pod_config", None),
):
if cfg_key is None:
cfg_key = camelCaseify(trait)
set_config_if_not_none(c.KubeSpawner, trait, "singleuser." + cfg_key)
image = get_config("singleuser.image.name")
if image:
tag = get_config("singleuser.image.tag")
if tag:
image = f"{image}:{tag}"
c.KubeSpawner.image = image
# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
# property where None, False, and True all are valid values that users of the
# Helm chart may want to set, we can't use the set_config_if_not_none helper
# function as someone may want to override the default False value to None.
#
c.KubeSpawner.allow_privilege_escalation = get_config(
"singleuser.allowPrivilegeEscalation"
)
# Combine imagePullSecret.create (single), imagePullSecrets (list), and
# singleuser.image.pullSecrets (list).
image_pull_secrets = []
if get_config("imagePullSecret.automaticReferenceInjection") and get_config(
"imagePullSecret.create"
):
image_pull_secrets.append(get_name("image-pull-secret"))
if get_config("imagePullSecrets"):
image_pull_secrets.extend(get_config("imagePullSecrets"))
if get_config("singleuser.image.pullSecrets"):
image_pull_secrets.extend(get_config("singleuser.image.pullSecrets"))
if image_pull_secrets:
c.KubeSpawner.image_pull_secrets = image_pull_secrets
# scheduling:
if get_config("scheduling.userScheduler.enabled"):
c.KubeSpawner.scheduler_name = get_name("user-scheduler")
if get_config("scheduling.podPriority.enabled"):
c.KubeSpawner.priority_class_name = get_name("priority")
# add node-purpose affinity
match_node_purpose = get_config("scheduling.userPods.nodeAffinity.matchNodePurpose")
if match_node_purpose:
node_selector = dict(
matchExpressions=[
dict(
key="hub.jupyter.org/node-purpose",
operator="In",
values=["user"],
)
],
)
if match_node_purpose == "prefer":
c.KubeSpawner.node_affinity_preferred.append(
dict(
weight=100,
preference=node_selector,
),
)
elif match_node_purpose == "require":
c.KubeSpawner.node_affinity_required.append(node_selector)
elif match_node_purpose == "ignore":
pass
else:
raise ValueError(
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
)
# Combine the common tolerations for user pods with singleuser tolerations
scheduling_user_pods_tolerations = get_config("scheduling.userPods.tolerations", [])
singleuser_extra_tolerations = get_config("singleuser.extraTolerations", [])
tolerations = scheduling_user_pods_tolerations + singleuser_extra_tolerations
if tolerations:
c.KubeSpawner.tolerations = tolerations
# Configure dynamically provisioning pvc
storage_type = get_config("singleuser.storage.type")
if storage_type == "dynamic":
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
if pvc_name_template:
c.KubeSpawner.pvc_name_template = pvc_name_template
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
c.KubeSpawner.storage_pvc_ensure = True
set_config_if_not_none(
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
)
set_config_if_not_none(
c.KubeSpawner,
"storage_access_modes",
"singleuser.storage.dynamic.storageAccessModes",
)
set_config_if_not_none(
c.KubeSpawner, "storage_capacity", "singleuser.storage.capacity"
)
# Add volumes to singleuser pods
c.KubeSpawner.volumes = [
{
"name": volume_name_template,
"persistentVolumeClaim": {"claimName": "{pvc_name}"},
}
]
c.KubeSpawner.volume_mounts = [
{
"mountPath": get_config("singleuser.storage.homeMountPath"),
"name": volume_name_template,
"subPath": get_config("singleuser.storage.dynamic.subPath"),
}
]
elif storage_type == "static":
pvc_claim_name = get_config("singleuser.storage.static.pvcName")
c.KubeSpawner.volumes = [
{"name": "home", "persistentVolumeClaim": {"claimName": pvc_claim_name}}
]
c.KubeSpawner.volume_mounts = [
{
"mountPath": get_config("singleuser.storage.homeMountPath"),
"name": "home",
"subPath": get_config("singleuser.storage.static.subPath"),
}
]
# Inject singleuser.extraFiles as volumes and volumeMounts with data loaded from
# the dedicated k8s Secret prepared to hold the extraFiles actual content.
extra_files = get_config("singleuser.extraFiles", {})
if extra_files:
volume = {
"name": "files",
}
items = []
for file_key, file_details in extra_files.items():
# Each item is a mapping of a key in the k8s Secret to a path in this
# abstract volume, the goal is to enable us to set the mode /
# permissions only though so we don't change the mapping.
item = {
"key": file_key,
"path": file_key,
}
if "mode" in file_details:
item["mode"] = file_details["mode"]
items.append(item)
volume["secret"] = {
"secretName": get_name("singleuser"),
"items": items,
}
c.KubeSpawner.volumes.append(volume)
volume_mounts = []
for file_key, file_details in extra_files.items():
volume_mounts.append(
{
"mountPath": file_details["mountPath"],
"subPath": file_key,
"name": "files",
}
)
c.KubeSpawner.volume_mounts.extend(volume_mounts)
# Inject extraVolumes / extraVolumeMounts
c.KubeSpawner.volumes.extend(get_config("singleuser.storage.extraVolumes", []))
c.KubeSpawner.volume_mounts.extend(
get_config("singleuser.storage.extraVolumeMounts", [])
)
c.JupyterHub.services = []
c.JupyterHub.load_roles = []
# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
#
if get_config("cull.enabled", False):
jupyterhub_idle_culler_role = {
"name": "jupyterhub-idle-culler",
"scopes": [
"list:users",
"read:users:activity",
"read:servers",
"delete:servers",
# "admin:users", # dynamically added if --cull-users is passed
],
# assign the role to a jupyterhub service, so it gains these permissions
"services": ["jupyterhub-idle-culler"],
}
cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
base_url = c.JupyterHub.get("base_url", "/")
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))
cull_timeout = get_config("cull.timeout")
if cull_timeout:
cull_cmd.append(f"--timeout={cull_timeout}")
cull_every = get_config("cull.every")
if cull_every:
cull_cmd.append(f"--cull-every={cull_every}")
cull_concurrency = get_config("cull.concurrency")
if cull_concurrency:
cull_cmd.append(f"--concurrency={cull_concurrency}")
if get_config("cull.users"):
cull_cmd.append("--cull-users")
jupyterhub_idle_culler_role["scopes"].append("admin:users")
if not get_config("cull.adminUsers"):
cull_cmd.append("--cull-admin-users=false")
if get_config("cull.removeNamedServers"):
cull_cmd.append("--remove-named-servers")
cull_max_age = get_config("cull.maxAge")
if cull_max_age:
cull_cmd.append(f"--max-age={cull_max_age}")
c.JupyterHub.services.append(
{
"name": "jupyterhub-idle-culler",
"command": cull_cmd,
}
)
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)
for key, service in get_config("hub.services", {}).items():
# c.JupyterHub.services is a list of dicts, but
# hub.services is a dict of dicts to make the config mergable
service.setdefault("name", key)
# As the api_token could be exposed in hub.existingSecret, we need to read
# it it from there or fall back to the chart managed k8s Secret's value.
service.pop("apiToken", None)
service["api_token"] = get_secret_value(f"hub.services.{key}.apiToken")
c.JupyterHub.services.append(service)
for key, role in get_config("hub.loadRoles", {}).items():
# c.JupyterHub.load_roles is a list of dicts, but
# hub.loadRoles is a dict of dicts to make the config mergable
role.setdefault("name", key)
c.JupyterHub.load_roles.append(role)
# respect explicit null command (distinct from unspecified)
# this avoids relying on KubeSpawner.cmd's default being None
_unspecified = object()
specified_cmd = get_config("singleuser.cmd", _unspecified)
if specified_cmd is not _unspecified:
c.Spawner.cmd = specified_cmd
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")
cloud_metadata = get_config("singleuser.cloudMetadata")
if cloud_metadata.get("blockWithIptables") == True:
# Use iptables to block access to cloud metadata by default
network_tools_image_name = get_config("singleuser.networkTools.image.name")
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
network_tools_resources = get_config("singleuser.networkTools.resources")
ip = cloud_metadata["ip"]
ip_block_container = client.V1Container(
name="block-cloud-metadata",
image=f"{network_tools_image_name}:{network_tools_image_tag}",
command=[
"iptables",
"--append",
"OUTPUT",
"--protocol",
"tcp",
"--destination",
ip,
"--destination-port",
"80",
"--jump",
"DROP",
],
security_context=client.V1SecurityContext(
privileged=True,
run_as_user=0,
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
),
resources=network_tools_resources,
)
c.KubeSpawner.init_containers.append(ip_block_container)
if get_config("debug.enabled", False):
c.JupyterHub.log_level = "DEBUG"
c.Spawner.debug = True
# load potentially seeded secrets
#
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
# that is set using the chart managed secret.
c.JupyterHub.cookie_secret = get_secret_value("hub.config.JupyterHub.cookie_secret")
# NOTE: CryptKeeper.keys should be a list of strings, but we have encoded as a
# single string joined with ; in the k8s Secret.
#
c.CryptKeeper.keys = get_secret_value("hub.config.CryptKeeper.keys").split(";")
# load hub.config values, except potentially seeded secrets already loaded
for app, cfg in get_config("hub.config", {}).items():
if app == "JupyterHub":
cfg.pop("proxy_auth_token", None)
cfg.pop("cookie_secret", None)
cfg.pop("services", None)
elif app == "ConfigurableHTTPProxy":
cfg.pop("auth_token", None)
elif app == "CryptKeeper":
cfg.pop("keys", None)
c[app].update(cfg)
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
if os.path.isdir(config_dir):
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
file_name = os.path.basename(file_path)
print(f"Loading {config_dir} config: {file_name}")
with open(file_path) as f:
file_content = f.read()
# compiling makes debugging easier: https://stackoverflow.com/a/437857
exec(compile(source=file_content, filename=file_name, mode="exec"))
# execute hub.extraConfig entries
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
print(f"Loading extra config: {key}")
exec(config_py)