Hello,
I need to be able to change the sbatch command depending on whether we are on a partition with GPUs or not.
I am using OptionsFormSpawner to offer a form to my users to choose partition/singularity image/number CPU or GPU .
I tried the following code:
c.BatchSpawnerBase.batch_submit_cmd = """
if [ "{partition}" = "GPUNodes" -o "{partition}" = "RTX6000Node" -o "{partition}" = "GPUNodes1080-dev" ]
then
sudo -E -u {username} sbatch --parsable --gres-flags=enforce-binding --gres=gpu:{gpu_number}
else
sudo -E -u {username} sbatch --parsable
fi
"""
But it is not working.
If I choose the GPUNodes partition in my form, it never enters the condition, and my nvidia-smi
command does not return any GPU.
If I put "sudo -E -u {username} sbatch --parsable --gres-flags=enforce-binding --gres=gpu:{gpu_number}"
in my else statement, it works correctly.
Log :
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: [I 2024-04-04 10:59:27.515 JupyterHub batchspawner:282] Spawner submitting command: if [ "GPUNodes1080-dev" = "GPUNodes" -o "GPUNodes1080-dev" = "RTX6000Node" -o "GPUNodes1080-dev" = "GPUNodes1
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: 080-dev" ]
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: then
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: sudo -E -u wvincent sbatch --parsable --gres-flags=enforce-binding --gres=gpu:1
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: else
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: sudo -E -u wvincent sbatch --parsable
Apr 04 10:59:27 r9jupyter jupyterhub[78415]: fi
Apr 04 10:59:27 r9jupyter sudo[78446]: root : PWD=/etc/jupyterhub ; USER=wvincent ; COMMAND=/bin/sbatch --parsable
This used to work in JupyterHub 1.x.
Packages version :
batchspawner 1.3.1.dev0 /local/batchspawner /usr/local/lib/python3.9/site-packages pip
jupyterhub 4.0.2 /usr/local/lib/python3.9/site-packages pip
optionsspawner 0.1.0 /usr/local/lib/python3.9/site-packages pip
wrapspawner 1.0.2.dev0 /usr/local/lib/python3.9/site-packages pip
My complete config :
import os
import importlib.machinery
import batchspawner
c = get_config() #noqa
#======================================================
# GENERAL
#======================================================
# Config de base
c.Authenticator.admin_users = {'xxx','xxx'}
c.JupyterHub.hub_ip = 'r9jupyter.domain.fr'
c.JupyterHub.bind_url = 'http://0.0.0.0:8000'
# On reset les sessions a chaque restart
c.JupyterHub.reset_db = True
#======================================================
# FORMULAIRE
#======================================================
from optionsspawner.forms import (
FormField,
TextInputField,
NumericalInputField,
CheckboxInputField,
SelectField,
)
partition_select = SelectField('req_partition',
label='Select a partition',
attr_required=True,
choices=[
('GPUNodes', "GPUNodes"),
('RTX6000Node', "RTX6000Node"),
('24CPUNodes', "24CPUNodes"),
('48CPUNodes', "48CPUNodes"),
('GPUNodes1080-dev', "GPUNodes1080-dev"),
],
default='GPUNodes1080-dev'
)
runtime_input = TextInputField('req_runtime',
label='Specify runtime (HH:MM:SS format, 19hr max)',
attr_required=True,
attr_value='01:00:00',
attr_pattern="[01]{1}[0-9]{1}:[0-5]{1}[0-9]{1}:[0-5]{1}[0-9]{1}"
)
gpu_number_input = NumericalInputField('req_gpu_number',
label='Specify number of GPUs (2 max per server), only for GPUNodes or RTX6000Node partition',
attr_required=True,
attr_value=1,
attr_min=1,
attr_max=2
)
cpu_task_input = NumericalInputField('req_cpu_task',
label='Specify number of CPUs per task',
attr_required=True,
attr_value=4,
attr_min=1,
attr_max=64
)
#======================================================
# Pour tous les fichier dans le répertoire <cuda_11>
#list_container_cuda11 = []
#cuda_11 = "/apps/containerCollections/CUDA11"
#for basename in os.listdir(cuda_11):
# # Si le fichier fini par ".sif"
# if basename[-4:] == ".sif":
# # on crer le chemin complet
# path = os.path.join(cuda_11, basename)
# list_container_cuda11.append( (path, "CUDA-11 " + basename) )
# Pour tous les fichier dans le répertoire <cuda_12>
cuda_12 = "/apps/containerCollections/CUDA12"
list_container_cuda12 = []
for basename in os.listdir(cuda_12):
# Si le fichier fini par ".sif"
if basename[-4:] == ".sif":
# on crer le chemin complet
path = os.path.join(cuda_12, basename)
list_container_cuda12.append( (path, "CUDA-12 " + basename) )
#======================================================
image_select = SelectField('req_image_path',
label='Select a singularity image in the list',
attr_required=True,
#choices=list_container_cuda11 + list_container_cuda12 + [("autre", "autre")],
choices=list_container_cuda12 + [("autre", "autre")],
default='autre'
)
image_input = TextInputField('req_specificimage_path',
label="or specify your own image (which MUST contains jupyterhub and jupyterlab packages)",
attr_placeholder='Path to your singularity image on OSIRIM (/users/.../your-image.sif or /projets/.../your-image.sif)',
)
form_fields = [
image_select,
image_input,
runtime_input,
partition_select,
gpu_number_input,
cpu_task_input,
]
#======================================================
# SPAWNER
#======================================================
c.BatchSpawnerBase.batch_script = '''#!/bin/bash
#SBATCH --time={runtime}
#SBATCH --output={homedir}/occidata-jupyter-%j.log
#SBATCH --error={homedir}/occidata-jupyter-%j.error
#SBATCH --job-name=jupyterlab
#SBATCH --export={keepvars}
#SBATCH --cpus-per-task={cpu_task}
#SBATCH --partition={partition}
#SBATCH --chdir={homedir}
#SBATCH --get-user-env=L
#SBATCH --ntasks=1
set -x
trap 'echo SIGTERM received' TERM
{prologue}
if [ "{image_path}" = "autre" ]
then
export SINGULARITYENV_CONTAINER_PATH={specificimage_path}
else
export SINGULARITYENV_CONTAINER_PATH={image_path}
fi
singularity exec --bind $PWD:/run/user $SINGULARITYENV_CONTAINER_PATH {cmd}
echo "jupyterhub-singleuser ended gracefully"
{epilogue}
'''
#sudo -E -u {username} sbatch --parsable --gres-flags=enforce-binding --gres=gpu:{gpu_number}
# On supprime sudo pour le remettre dans les conditions ci dessous
c.BatchSpawnerBase.exec_prefix = ""
# En fonction de la partition, on configure les gpu ou non
c.BatchSpawnerBase.batch_submit_cmd = """if [ "{partition}" = "GPUNodes" -o "{partition}" = "RTX6000Node" -o "{partition}" = "GPUNodes1
080-dev"]
then
sudo -E -u {username} sbatch --parsable --gres-flags=enforce-binding --gres=gpu:{gpu_number}
else
sudo -E -u {username} sbatch --parsable
fi"""
# On selectionne le mode formulaire
c.JupyterHub.spawner_class = 'optionsspawner.OptionsFormSpawner'
# On defini le spawner , detail dans https://github.com/jupyterhub/batchspawner/blob/fe5a893eaf9eb5e121cbe36bad2e69af798e6140/batchspawner/batchspawner.py
c.OptionsFormSpawner.child_class = 'batchspawner.SlurmSpawner'
# On lui donne les champs precedement defini
c.OptionsFormSpawner.form_fields = form_fields
# BatchSpawner config
c.BatchSpawnerBase.req_host = 'r9jupyter.domain.fr'
c.BatchSpawnerBase.req_runtime = '12:00:00'
c.BatchSpawnerBase.req_nprocs = '2'
c.BatchSpawnerBase.req_queue = 'r9jupyter'
# Spawner config
c.Spawner.default_url = '/lab'
c.Spawner.notebook_dir = '~'
c.Spawner.start_timeout = 300
c.Spawner.http_timeout = 300
Thank you in advance for your help.
Sorry for duplicate with github repo
Best regards,