Skip to content

Commit 86657ee

Browse files
authored
Merge pull request #45 from xuwenyihust/44-avoid-variable-hardcoding-in-notebook-startuppy
Update Spark configuration and Docker image version
2 parents 61f8378 + 437806d commit 86657ee

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

docker/jupyter-notebook/Dockerfile.notebook

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ ENV JUPYTER_CONFIG_DIR /home/jovyan/.jupyter/
5050
# Add the JUPYTER_CONFIG_DIR to the PYTHONPATH
5151
ENV PYTHONPATH "${PYTHONPATH}:${JUPYTER_CONFIG_DIR}"
5252

53+
ENV HOME_DIR="/home/jovyan"
5354
ENV BUCKET_NAME="data-platform-bucket-20231126"
55+
ENV NAMESPACE="spark-dev"
56+
ENV SERVICE_ACCOUNT="spark"
57+
ENV EXECUTOR_IMAGE="wenyixu101/spark:3.5.0-python3.11"
58+
ENV WEBUI_SERVICE_NAME="notebook-spark-ui"
5459

5560

docker/jupyter-notebook/startup.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from IPython.display import *
77
from kubernetes import client, config
88

9-
print("Running startup script startup.py")
10-
119
# Initialize the GCS client
1210
storage_client = storage.Client()
1311

@@ -16,7 +14,7 @@
1614
bucket = storage_client.bucket(bucket_name)
1715

1816
# Ensure the local directory exists
19-
local_notebook_dir = "/home/jovyan/"
17+
local_notebook_dir = os.environ.get("HOME_DIR", "/home/jovyan")
2018
os.makedirs(local_notebook_dir, exist_ok=True)
2119

2220
# Sync from GCS to local
@@ -30,6 +28,9 @@
3028

3129
app_name = os.environ.get("APP_NAME", "PySpark Example")
3230
driver_host = "notebook-cluster-ip.spark-dev.svc.cluster.local"
31+
namespace = os.environ.get("NAMESPACE", "spark-dev")
32+
service_account = os.environ.get("SERVICE_ACCOUNT", "spark")
33+
executor_image = os.environ.get("EXECUTOR_IMAGE", "wenyixu101/spark:3.5.0-python3.11")
3334

3435
# Create a Spark session
3536
def create_spark():
@@ -43,10 +44,10 @@ def create_spark():
4344
.config("spark.executor.instances", "1") \
4445
.config("spark.executor.cores", "1") \
4546
.config("spark.executor.memory", "1g") \
46-
.config("spark.kubernetes.namespace", "spark-dev") \
47-
.config("spark.kubernetes.container.image", "wenyixu101/spark:3.5.0-python3.11") \
48-
.config("spark.kubernetes.authenticate.driver.serviceAccountName", "spark") \
49-
.config("spark.kubernetes.authenticate.executor.serviceAccountName", "spark") \
47+
.config("spark.kubernetes.namespace", namespace) \
48+
.config("spark.kubernetes.container.image", executor_image) \
49+
.config("spark.kubernetes.authenticate.driver.serviceAccountName", service_account) \
50+
.config("spark.kubernetes.authenticate.executor.serviceAccountName", service_account) \
5051
.config("spark.eventLog.enabled", "true") \
5152
.config("spark.eventLog.dir", f"gs://{bucket_name}/event-logs/") \
5253
.config("spark.history.fs.logDirectory", f"gs://{bucket_name}/event-logs/") \
@@ -65,8 +66,7 @@ def start():
6566
v1 = client.CoreV1Api()
6667

6768
# Fetching the service details
68-
service_name = "notebook-spark-ui"
69-
namespace = "spark-dev"
69+
service_name = os.environ.get("WEBUI_SERVICE_NAME", "notebook-spark-ui")
7070
service = v1.read_namespaced_service(service_name, namespace)
7171

7272
webui_host = service.status.load_balancer.ingress[0].ip

helm/data-platform/templates/notebook-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ spec:
1515
serviceAccountName: spark
1616
containers:
1717
- name: notebook
18-
image: wenyixu101/all-spark-notebook:spark-3.5.0_25
18+
image: wenyixu101/all-spark-notebook:spark-3.5.0_26
1919
imagePullPolicy: Always
2020
command: ["/bin/bash", "-c", "start-notebook.sh"]
2121
ports:

0 commit comments

Comments
 (0)