Hello!
I’m using Helm and following the Zero to JupyterHub guide. Unfortunately, the singleuser pods are stuck in Pending state indefinitely.
Originally I thought it was an issue with the dynamic storage provisioning because the PVC for the pod was also stuck in Pending state. I have since resolved that by manually creating a new StorageClass and specifying it in singleuser.storage.dynamic.storageClass
. (h/t to https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/1413 for helping me fix that one)
Now the PVC created for the singleuser pod is being bound correctly, but the pod itself ends up getting stuck in Pending state. I have run kubectl get events -n jhub
and I have not been able to identify any obvious issues.
The install command I am using is:
#!/usr/bin/env bash
helm install \
--install jhub jupyterhub/jupyterhub \
--namespace jhub \
--create-namespace \
--version=0.9.0 \
--debug \
--values config.yml
I have also tried 0.9.1
and 0.9.2
and I have the same issues.
My config file looks like this (it is long because I generated it with helm show values
):
custom: {}
hub:
service:
type: ClusterIP
annotations: {}
ports:
nodePort:
loadBalancerIP:
baseUrl: /
cookieSecret:
publicURL:
initContainers: []
fsGid: 1000
nodeSelector: {}
concurrentSpawnLimit: 64
consecutiveFailureLimit: 5
activeServerLimit:
deploymentStrategy:
## type: Recreate
## - sqlite-pvc backed hubs require the Recreate deployment strategy as a
## typical PVC storage can only be bound to one pod at the time.
## - JupyterHub isn't designed to support being run in parallell. More work
## needs to be done in JupyterHub itself for a fully highly available (HA)
## deployment of JupyterHub on k8s is to be possible.
type: Recreate
db:
type: sqlite-pvc
upgrade:
pvc:
annotations: {}
selector: {}
accessModes:
- ReadWriteOnce
storage: 1Gi
subPath:
storageClassName:
url:
password:
labels: {}
annotations: {}
extraConfig: {}
extraConfigMap: {}
extraEnv: {}
extraContainers: []
extraVolumes: []
extraVolumeMounts: []
image:
name: jupyterhub/k8s-hub
tag: '0.9.0'
pullSecrets:
resources:
requests:
cpu: 200m
memory: 512Mi
containerSecurityContext:
runAsUser: 1000
allowPrivilegeEscalation: false
services: {}
imagePullSecret:
enabled: false
registry:
username:
email:
password:
pdb:
enabled: true
minAvailable: 1
networkPolicy:
enabled: false
ingress: []
## egress for JupyterHub already includes Kubernetes internal DNS and
## access to the proxy, but can be restricted further, but ensure to allow
## access to the Kubernetes API server that couldn't be pinned ahead of
## time.
##
## ref: https://stackoverflow.com/a/59016417/2220152
egress:
- to:
- ipBlock:
cidr: 0.0.0.0/0
allowNamedServers: false
namedServerLimitPerUser:
authenticatePrometheus:
redirectToServer:
shutdownOnLogout:
templatePaths: []
templateVars: {}
livenessProbe:
enabled: false
initialDelaySeconds: 60
periodSeconds: 10
failureThreshold: 3
timeoutSeconds: 1
readinessProbe:
enabled: true
initialDelaySeconds: 0
periodSeconds: 2
failureThreshold: 3
timeoutSeconds: 1
# existingSecret: existing-secret
rbac:
enabled: true
proxy:
secretToken: 'REDACTED'
deploymentStrategy:
## type: Recreate
## - JupyterHub's interaction with the CHP proxy becomes a lot more robust
## with this configuration. To understand this, consider that JupyterHub
## during startup will interact a lot with the k8s service to reach a
## ready proxy pod. If the hub pod during a helm upgrade is restarting
## directly while the proxy pod is making a rolling upgrade, the hub pod
## could end up running a sequence of interactions with the old proxy pod
## and finishing up the sequence of interactions with the new proxy pod.
## As CHP proxy pods carry individual state this is very error prone. One
## outcome when not using Recreate as a strategy has been that user pods
## have been deleted by the hub pod because it considered them unreachable
## as it only configured the old proxy pod but not the new before trying
## to reach them.
type: Recreate
## rollingUpdate:
## - WARNING:
## This is required to be set explicitly blank! Without it being
## explicitly blank, k8s will let eventual old values under rollingUpdate
## remain and then the Deployment becomes invalid and a helm upgrade would
## fail with an error like this:
##
## UPGRADE FAILED
## Error: Deployment.apps "proxy" is invalid: spec.strategy.rollingUpdate: Forbidden: may not be specified when strategy `type` is 'Recreate'
## Error: UPGRADE FAILED: Deployment.apps "proxy" is invalid: spec.strategy.rollingUpdate: Forbidden: may not be specified when strategy `type` is 'Recreate'
rollingUpdate:
containerSecurityContext:
allowPrivilegeEscalation: false
service:
type: LoadBalancer
labels: {}
annotations: {}
nodePorts:
http:
https:
loadBalancerIP:
loadBalancerSourceRanges: []
chp:
image:
name: jupyterhub/configurable-http-proxy
tag: 4.2.1
livenessProbe:
enabled: true
initialDelaySeconds: 60
periodSeconds: 10
readinessProbe:
enabled: true
initialDelaySeconds: 0
periodSeconds: 2
resources:
requests:
cpu: 200m
memory: 512Mi
extraEnv: {}
traefik:
image:
name: traefik
tag: v2.3 # ref: https://hub.docker.com/_/traefik?tab=tags
hsts:
includeSubdomains: false
preload: false
maxAge: 15724800 # About 6 months
resources: {}
extraEnv: {}
extraVolumes: []
extraVolumeMounts: []
extraStaticConfig: {}
extraDynamicConfig: {}
containerSecurityContext:
allowPrivilegeEscalation: false
secretSync:
image:
name: jupyterhub/k8s-secret-sync
tag: '0.9.0'
resources: {}
labels: {}
nodeSelector: {}
pdb:
enabled: true
minAvailable: 1
https:
enabled: true
#type: letsencrypt, manual, offload, secret
type: offload
service:
annotations:
external-dns.alpha.kubernetes.io/hostname: REDACTED
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:REDACTED
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
# The protocol to use on the backend, we use TCP since we're using websockets
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp"
# Which ports should use SSL
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https"
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
networkPolicy:
enabled: false
ingress: []
egress:
- to:
- ipBlock:
cidr: 0.0.0.0/0
auth:
type: github
github:
clientId: "REDACTED"
clientSecret: "REDACTED"
callbackUrl: "REDACTED/hub/oauth_callback"
orgWhitelist:
- "REDACTED"
scopes:
- "read:org"
whitelist:
users:
admin:
access: true
users:
- dmerrick
dummy:
password:
ldap:
dn:
search: {}
user: {}
user: {}
state:
enabled: false
cryptoKey:
singleuser:
extraTolerations: []
nodeSelector: {}
extraNodeAffinity:
required: []
preferred: []
extraPodAffinity:
required: []
preferred: []
extraPodAntiAffinity:
required: []
preferred: []
networkTools:
image:
name: jupyterhub/k8s-network-tools
tag: '0.9.0'
cloudMetadata:
enabled: true
ip: 169.254.169.254
networkPolicy:
enabled: false
ingress: []
egress:
# Required egress is handled by other rules so it's safe to modify this
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
- 169.254.169.254/32
events: true
extraAnnotations: {}
extraLabels:
hub.jupyter.org/network-access-hub: 'true'
extraEnv: {}
lifecycleHooks: {}
initContainers: []
extraContainers: []
uid: 1000
fsGid: 100
serviceAccountName:
storage:
type: dynamic
extraLabels: {}
extraVolumes: []
extraVolumeMounts: []
static:
pvcName:
subPath: '{username}'
capacity: 10Gi
homeMountPath: /home/jovyan
dynamic:
storageClass: jhub-singleuser
pvcNameTemplate: claim-{username}{servername}
volumeNameTemplate: volume-{username}{servername}
storageAccessModes: [ReadWriteOnce]
image:
name: jupyter/base-notebook
tag: 'latest'
pullPolicy: Always
# pullSecrets:
# - secretName
imagePullSecret:
enabled: false
registry:
username:
email:
password:
startTimeout: 600
cpu:
limit:
guarantee:
memory:
limit:
guarantee: 1G
extraResource:
limits: {}
guarantees: {}
cmd: jupyterhub-singleuser
defaultUrl:
extraPodConfig: {}
scheduling:
userScheduler:
enabled: true
replicas: 2
logLevel: 4
plugins:
score:
disabled:
- name: ImageLocality
- name: NodeResourcesLeastAllocated
- name: NodeResourcesBalancedAllocation
- name: SelectorSpread
enabled:
- name: NodeResourcesMostAllocated
image:
name: k8s.gcr.io/kube-scheduler
tag: v1.19.1
nodeSelector: {}
pdb:
enabled: true
minAvailable: 1
resources:
requests:
cpu: 50m
memory: 256Mi
podPriority:
enabled: false
globalDefault: false
defaultPriority: 0
userPlaceholderPriority: -10
userPlaceholder:
enabled: true
replicas: 0
corePods:
nodeAffinity:
matchNodePurpose: prefer
userPods:
nodeAffinity:
matchNodePurpose: prefer
prePuller:
annotations: {}
resources:
requests:
cpu: 0
memory: 0
hook:
enabled: true
image:
name: jupyterhub/k8s-image-awaiter
tag: '0.9.0'
podSchedulingWaitDuration: -1
continuous:
enabled: true
extraImages: {}
pause:
image:
name: k8s.gcr.io/pause
# Pick version from https://console.cloud.google.com/gcr/images/google-containers/GLOBAL/pause?gcrImageListsize=30
tag: '3.2'
ingress:
enabled: false
annotations: {}
hosts: []
pathSuffix: ''
tls: []
cull:
enabled: true
users: false
removeNamedServers: false
timeout: 3600
every: 600
concurrency: 10
maxAge: 0
debug:
enabled: false