I am trying to configure my kernel.json to run in both yarn client/cluster modes inside HPE Data Fabric, could anyone please help with the right configurations.
I am using the below kernel configuration
{
"language": "python",
"display_name": "Spark - Python (YARN Cluster Mode)",
"metadata": {
"process_proxy": {
"class_name": "enterprise_gateway.services.processproxies.yarn.YarnClusterProcessProxy"
}
},
"env": {
"SPARK_HOME": "/opt/mapr/spark/spark-3.2.0",
"PYSPARK_PYTHON": "/opt/anaconda3/envs/spark_py38/bin/python",
"PYTHONPATH": "/opt/anaconda3/envs/spark_py38/lib/python3.8/site-packages/:/opt/mapr/spark/spark-3.2.0/python:/opt/mapr/spark/spark-3.2.0/python/lib/py4j-0.10.9.2-src.zip",
"SPARK_OPTS": "--master yarn --deploy-mode cluster --name ${KERNEL_ID:-ERROR__NO__KERNEL_ID} --conf spark.yarn.submit.waitAppCompletion=false --conf spark.yarn.appMasterEnv.PYTHONUSERBASE=/home/${KERNEL_USERNAME}/.local --conf spark.yarn.appMasterEnv.PYTHONPATH=/opt/anaconda3/envs/spark_py38/lib/python3.8/site-packages/:/opt/mapr/spark/spark-3.2.0/python:/opt/mapr/spark/spark-3.2.0/python/lib/py4j-0.10.9.2-src.zip --conf spark.yarn.appMasterEnv.PATH=/opt/anaconda3/envs/spark_py38/bin:$PATH ${KERNEL_EXTRA_SPARK_OPTS}",
"LAUNCH_OPTS": ""
},
"argv": [
"/opt/anaconda3/envs/spark_py38/share/jupyter/kernels/spark_python_yarn_cluster/bin/run.sh",
"--RemoteProcessProxy.kernel-id",
"{kernel_id}",
"--RemoteProcessProxy.response-address",
"{response_address}",
"--RemoteProcessProxy.port-range",
"{port_range}",
"--RemoteProcessProxy.spark-context-initialization-mode",
"lazy"
]
}