Dockerspawner and volumes from host

I’ve struggled for a few days to get persistent data to work when using Dockerspawner and need some pointers in where I’m going wrong. I’m also using LocalAzureAdOAuthenticator, which I’ve got running.

tl;dr: using the config below I get Permission denied when trying to create a Notebook in my user container.

c = get_config()
import os
import shutil
import subprocess

from oauthenticator.azuread import LocalAzureAdOAuthenticator
from tornado.log import app_log
from traitlets import default

class MyAzureAdOAuthenticator(LocalAzureAdOAuthenticator):
    """Custom implementation that uses username instead of name as login."""

    @default('username_claim')
    def _username_claim_default(self):
        return 'unique_name'

    def normalize_username(self, username):
        """
        Override normalize_username to avoid lowercasing usernames.

        We prefer using the standard of sgxx.
        """
        return username.split("@")[0]


c.Application.log_level = 'DEBUG'
c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'

# Spawn a new docker for each user
NOTEBOOK_DIR = '/home/jovyan/work'
HOST_NOTEBOOK_PATH = "/home/{username}"

c.JupyterHub.spawner_class = "docker"
c.DockerSpawner.image = os.environ["DOCKER_JUPYTER_IMAGE"]
c.DockerSpawner.network_name = os.environ["DOCKER_NETWORK_NAME"]
c.DockerSpawner.notebook_dir = NOTEBOOK_DIR
c.DockerSpawner.remove = True

c.DockerSpawner.volumes = {
    f"{HOST_NOTEBOOK_PATH}": {"bind": NOTEBOOK_DIR, "mode": "rw"}
}

c.JupyterHub.hub_ip = os.environ["HUB_IP"]
c.Spawner.default_url = "/lab"

c.JupyterHub.services = [
    {
        "name": "cull_idle",
        "admin": True,
        "command": [
            "python3",
            "/srv/jupyterhub/cull_idle_servers.py",
            "--timeout=600",
        ],
    }
]

# Login using AAD
c.JupyterHub.authenticator_class = MyAzureAdOAuthenticator
c.AzureAdOAuthenticator.oauth_callback_url = 'http://localhost/hub/oauth_callback'

c.AzureAdOAuthenticator.tenant_id = os.environ.get('AAD_TENANT_ID')
c.AzureAdOAuthenticator.client_id = os.environ.get('OAUTH_CLIENT_ID')
c.AzureAdOAuthenticator.client_secret = os.environ.get('OAUTH_CLIENT_SECRET')

# specify users and admin
c.Authenticator.admin_users = {'sgxx'}
c.MyAzureAdOAuthenticator.create_system_users = True

This configuration starts up the Jupyterhub. Entering into the container, I’m able to list the /home-directory:

root@6d627b13627c:/home# pwd
/home
root@6d627b13627c:/home# ls -lashtr
total 12K
4.0K drwxr-xr-x 1 root root 4.0K Dec  3 14:22 ..
4.0K drwxr-xr-x 1 root root 4.0K Dec  3 14:22 .
4.0K drwxr-xr-x 2 sgxx sgxx4.0K Dec  3 14:22 sgxx
root@6d627b13627c:/home# 

After authenticating using Azure AD, a container for my user is spawned and I’m presented with the Jupyterlab-environment in my browser. Trying to create a Notebok, however, raises Permission denied: Untitled.ipynb followed by Cannot read property 'path' of undefined.

Looking into the user container, I see this. Noteworthy is that the work folder is owned by root and not jovyan:users.

(base) jovyan@5020eb70d918:~$ pwd
/home/jovyan
(base) jovyan@5020eb70d918:~$ ls -lashtr
total 68K
4.0K -rw-rw-r-- 1 jovyan users  807 Feb 25  2020 .profile
4.0K -rw-rw-r-- 1 jovyan users  220 Feb 25  2020 .bash_logout
4.0K -rw-rw-r-- 1 jovyan users 3.8K Nov  8 16:20 .bashrc
8.0K drwxr-xr-x 1 root   root  4.0K Nov  8 16:20 ..
4.0K drwsrwsr-x 1 jovyan users 4.0K Nov  8 16:21 .conda
4.0K drwsrwsr-x 2 jovyan users 4.0K Nov  8 16:35 .empty
4.0K drwsrwsr-x 3 jovyan users 4.0K Nov  8 16:36 .yarn
8.0K drwsrws--- 1 jovyan users 4.0K Nov  8 16:38 .config
4.0K drwxr-xr-x 3 root   root  4.0K Dec  3 12:49 work
4.0K drwxr-sr-x 3 jovyan users 4.0K Dec  3 14:32 .local
4.0K drwsrwsr-x 1 jovyan users 4.0K Dec  3 14:32 .cache
4.0K drwsrws--- 1 jovyan users 4.0K Dec  3 14:32 .jupyter
4.0K drwxr-sr-x 3 jovyan users 4.0K Dec  3 14:32 .npm
8.0K drwsrwsr-x 1 jovyan users 4.0K Dec  3 14:32 .
(base) jovyan@5020eb70d918:~$ 

Finally, all of this is controlled through docker-compose:

version: "3.1"

services:
  jupyterhub:
    build:
      context: ./jupyterhub
      dockerfile: Dockerfile
    container_name: jupyterhub_hub # The service will use this container name.
    restart: unless-stopped
    volumes: # Give access to Docker socket.
      # Bind Docker socket on the host so we can connect to the daemon from
      # within the container
      - "/var/run/docker.sock:/var/run/docker.sock:rw"
    environment: # Env variables passed to the Hub process.
      DOCKER_JUPYTER_IMAGE: jupyterlab_img
      DOCKER_NETWORK_NAME: ${COMPOSE_PROJECT_NAME}_default
      HUB_IP: jupyterhub_hub
      AAD_TENANT_ID: "${AAD_TENANT_ID}"
      OAUTH_CLIENT_ID: "${OAUTH_CLIENT_ID}"
      OAUTH_CLIENT_SECRET: "${OAUTH_CLIENT_SECRET}"
      AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
      AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
      AWS_DEFAULT_REGION: "${AWS_DEFAULT_REGION}"
      AWS_JUPYTER_BUCKET: "${AWS_JUPYTER_BUCKET}"
    labels: # Traefik configuration.
      - "traefik.enable=true"
      - "traefik.frontend.rule=Host:localhost"
    command: jupyterhub -f /srv/jupyterhub/jupyterhub_config.py &>> '/var/log/jupyterhub.log'

  # Configuration for the single-user servers
  jupyterlab:
    build: jupyterlab
    image: jupyterlab_img
    command: echo

  reverse-proxy:
    restart: unless-stopped
    image: traefik:v1.7.16
    ports:
      - "80:80"
      - "8080:8080"
    volumes:
      - ./reverse-proxy/traefik.toml:/etc/traefik/traefik.toml
      - /var/run/docker.sock:/var/run/docker.sock

The Dockerfile for Jupyterhub extends jupyterhub/jupyterhub:1.2 and does nothing fancy really. Just installs a few requirements using apt-get and conda.

The Dockerfile for jupyterlab extends jupyter/scipy-notebook:42f4c82a07ff and also installs some requirements using apt-get and conda. The final lines in that file is:

USER $NB_UID
WORKDIR $HOME
1 Like

@shardo: I’ve read your posts here and got the feeling you have been struggling with similar issues? How did you end up solving it?

Hi ggravlingen,

Is it some problem between the uid on the host vs the container? jovyan runs with uid 1000.

Thanks,
Mark

1 Like

Not sure what’s going on here. :slight_smile: Below is the Docker-file for my jupyterlab and my understanding is that the line USER $NB_UID makes me become jovyan in the container? The commands below at least seems to suggest this is the case:

(base) jovyan@64fbab96975c:~$ pwd
/home/jovyan
(base) jovyan@64fbab96975c:~$ whoami
jovyan
(base) jovyan@64fbab96975c:~$ 

Dockerfile for jupyterlab:

FROM jupyter/scipy-notebook:42f4c82a07ff

USER root

# Install system packages
RUN apt-get update && apt-get install -y --no-install-recommends \
      s3cmd && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get autoremove

USER $NB_UID
WORKDIR $HOME

# Copy the JupyterHub configuration in the container
COPY requirements.txt /tmp/

RUN conda install --quiet --yes \
    --file /tmp/requirements.txt \
    && conda clean --all -f -y

Yes, sorry I wasn’t clear. Your bind mount is coming up as owned by root (uid 0) on the guest, but it needs to be owned by jovyan (uid 1000). I’m wondering if there’s a problem with the host where it’s not translating the uid of the user’s home directory correctly to the guest.

Thanks! I made some changes to the spawner config to try fo facilitate this:

# Spawn a new docker for each user
NOTEBOOK_DIR = '/home/jovyan/work'
HOST_HOME_PATH = "/home"
HOST_USER_PATH = HOST_HOME_PATH + "/{username}"
HOST_NOTEBOOK_PATH = f"{HOST_USER_PATH}/work"

c.JupyterHub.spawner_class = "docker"
c.DockerSpawner.image = os.environ["DOCKER_JUPYTER_IMAGE"]
c.DockerSpawner.network_name = os.environ["DOCKER_NETWORK_NAME"]
c.DockerSpawner.notebook_dir = NOTEBOOK_DIR
c.DockerSpawner.remove = True
c.DockerSpawner.extra_create_kwargs = {'user': 'root'}
c.DockerSpawner.environment = {
    "CHOWN_HOME": "yes",
    "CHOWN_EXTRA": "/home/jovyan",
    "CHOWN_HOME_OPTS": "-R",
    "NB_UID": 1000,
    "NB_GID": 1000,
}

c.DockerSpawner.volumes = {
    f"{HOST_NOTEBOOK_PATH}": NOTEBOOK_DIR
}

Now, the work folder is owned by the jovyan and I can create files in the lab. A good start! However, I would expect any created file to show up in the host folder that I’ve mapped to the spawned container’s work folder. Currently, this does not happen. I’m I mistaken that files should be stored in the mapped host folder?

1 Like

I’m a bit ashamed to say that I misunderstood what was the host here. I thought it was the jupyterhub-container but, of course, it the VM where I’m running docker. All works as expected now. :slight_smile: