Hello everyone,
I want to set up a Jupyterhub platform based on Docker Swarm, with an NFS mount to access user folders.
I had already received help on this forum for implementing such a solution ( Get username with dockerspawner - #6 by mahendrapaipuri ). It used LTI authentication, which I now want to replace with CAS authentication.
Currently, the creation of users and their home directories done correctly with the correct UIDs. However, when generating the notebook container, it starts with Jovyan’s default UID, which prevents it from accessing the user folder.
This problem did not arise in the LTI version because the UID was obtained by a calculation directly in the MySwarmSpawner class, which overrides SwarmSpawner. In this new scenario, I absolutely must retrieve the UID that was assigned when the user was created in the HUB container before passing it as an argument to the notebook container in order to replace Jovyan’s UID.
Here’s my code:
import os
import shutil
import pwd
import logging
import sys
from dockerspawner import SwarmSpawner
from jhub_cas_authenticator.cas_auth import CASLocalAuthenticator
# The proxy is in another container
c.ConfigurableHTTPProxy.should_start = False
c.ConfigurableHTTPProxy.api_url = 'http://proxy:8001'
c.LocalAuthenticator.create_system_users = True
c.JupyterHub.cookie_secret = 'My_token'
c.JupyterHub.cookie_max_age_days = 1
class MySwarmSpawner(SwarmSpawner):
def load_user_env(self, env):
username = 'jupyter-' + self.user.name
try:
pw_record = pwd.getpwnam(username)
uid = pw_record.pw_uid
gid = pw_record.pw_gid
except KeyError:
self.log.error(f"user {username} not found on the system.")
raise
self.log.info(f"set up of NB_UID={uid} and NB_GID={gid} for user {username}")
env['NB_UID'] = str(uid)
env['NB_GID'] = str(gid)
return env
c.JupyterHub.spawner_class = MySwarmSpawner
c.JupyterHub.shutdown_on_logout = True
c.JupyterHub.init_spawners_timeout = 60
c.SystemUserSpawner.name_template = '{prefix}-{username}-{servername}'
c.JupyterHub.allow_named_servers = True
c.JupyterHub.named_server_limit_per_user = 2
c.JupyterHub.log_level = logging.DEBUG
c.JupyterHub.cleanup_servers = False
def pre_spawn_hook(spawner):
"""
Creates the user directory with the correct rights AND injects NB_UID, NB_GID, NB_USER into the environment
"""
username = spawner.user.name
jupyter_username = 'jupyter-' + username
volume_path = os.path.join('/home', jupyter_username)
# UID/GID Recovery
try:
pwd.getpwnam(jupyter_username)
except KeyError:
os.system(f"useradd -m {jupyter_username}")
# Récupération des UID/GID
try:
user_info = pwd.getpwnam(jupyter_username)
uid = user_info.pw_uid
gid = user_info.pw_gid
except KeyError:
raise ValueError(f"User {jupyter_username} not found in /etc/passwd du Hub")
# Creation of the home if necessary + copy of the skeleton
if not os.path.exists(volume_path):
os.makedirs(volume_path, mode=0o755)
src_path = '/home_src/jovyan/'
if os.path.exists(src_path):
shutil.copytree(src_path, volume_path, dirs_exist_ok=True)
# Allocation of rights
os.chown(volume_path, uid, 100)
os.chmod(volume_path, 0o755)
for root, dirs, files in os.walk(volume_path):
for dir_name in dirs:
os.chown(os.path.join(root, dir_name), uid, 100)
os.chmod(os.path.join(root, dir_name), 0o755)
for file_name in files:
os.chown(os.path.join(root, file_name), uid, 100)
os.chmod(os.path.join(root, file_name), 0o644)
# Injecting environment variables into the container
spawner.environment.update({
'NB_USER': jupyter_username,
'NB_UID': str(uid),
'NB_GID': str(gid),
})
c.Spawner.pre_spawn_hook = pre_spawn_hook
def clean_dir_hook(spawner):
"""Cleans temporary files in the user home"""
username = spawner.user.name
temp_path = os.path.join('/home', username, 'temp')
if os.path.exists(temp_path) and os.path.isdir(temp_path):
shutil.rmtree(temp_path)
c.Spawner.post_stop_hook = clean_dir_hook
# NFS mount
c.DockerSpawner.volumes = {
'MY_NFS_SHARE/home/jupyter-{username}': '/home/jovyan'
}
# Authorized images
c.DockerSpawner.image_whitelist = {
"base notebook": "jupyter/base-notebook:notebook-6.1.6",
"R and Spark": "quay.io/jupyter/all-spark-notebook",
}
# Idle culler
c.JupyterHub.services = [
{
"name": "jupyterhub-idle-culler-service",
"command": [
sys.executable,
"-m", "jupyterhub_idle_culler",
"--timeout=3600",
],
}
]
c.JupyterHub.load_roles = [
{
"name": "jupyterhub-idle-culler-role",
"scopes": [
"list:users",
"read:users:activity",
"read:servers",
"delete:servers",
],
"services": ["jupyterhub-idle-culler-service"],
}
]
# Hub config
c.JupyterHub.hub_ip = '0.0.0.0'
c.JupyterHub.hub_connect_ip = 'hub'
c.SwarmSpawner.network_name = 'jupyterhub-net'
c.SwarmSpawner.extra_host_config = {'network_mode': 'jupyterhub-net'}
c.Spawner.cmd = ['start-notebook.sh']
# Auth CAS
c.JupyterHub.authenticator_class = CASLocalAuthenticator
c.CASLocalAuthenticator.cas_login_url = 'https://My_CAS_URL/cas/'
c.CASLocalAuthenticator.cas_service_url = 'https://My_jupyter_URL/login'
c.CASLocalAuthenticator.cas_client_ca_certs = '/srv/jupyterhub/CAS.pem'
c.CASLocalAuthenticator.cas_service_validate_url = 'https://My_CAS_URL/cas/p3/serviceValidate'
Any help will be greatly appreciated.