Skip to content

Commit

Permalink
Merge pull request #45 from xuwenyihust/44-avoid-variable-hardcoding-…
Browse files Browse the repository at this point in the history
…in-notebook-startuppy

Update Spark configuration and Docker image version
  • Loading branch information
xuwenyihust authored Jan 8, 2024
2 parents 61f8378 + 437806d commit 86657ee
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
5 changes: 5 additions & 0 deletions docker/jupyter-notebook/Dockerfile.notebook
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ ENV JUPYTER_CONFIG_DIR /home/jovyan/.jupyter/
# Add the JUPYTER_CONFIG_DIR to the PYTHONPATH
ENV PYTHONPATH "${PYTHONPATH}:${JUPYTER_CONFIG_DIR}"

ENV HOME_DIR="/home/jovyan"
ENV BUCKET_NAME="data-platform-bucket-20231126"
ENV NAMESPACE="spark-dev"
ENV SERVICE_ACCOUNT="spark"
ENV EXECUTOR_IMAGE="wenyixu101/spark:3.5.0-python3.11"
ENV WEBUI_SERVICE_NAME="notebook-spark-ui"


18 changes: 9 additions & 9 deletions docker/jupyter-notebook/startup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from IPython.display import *
from kubernetes import client, config

print("Running startup script startup.py")

# Initialize the GCS client
storage_client = storage.Client()

Expand All @@ -16,7 +14,7 @@
bucket = storage_client.bucket(bucket_name)

# Ensure the local directory exists
local_notebook_dir = "/home/jovyan/"
local_notebook_dir = os.environ.get("HOME_DIR", "/home/jovyan")
os.makedirs(local_notebook_dir, exist_ok=True)

# Sync from GCS to local
Expand All @@ -30,6 +28,9 @@

app_name = os.environ.get("APP_NAME", "PySpark Example")
driver_host = "notebook-cluster-ip.spark-dev.svc.cluster.local"
namespace = os.environ.get("NAMESPACE", "spark-dev")
service_account = os.environ.get("SERVICE_ACCOUNT", "spark")
executor_image = os.environ.get("EXECUTOR_IMAGE", "wenyixu101/spark:3.5.0-python3.11")

# Create a Spark session
def create_spark():
Expand All @@ -43,10 +44,10 @@ def create_spark():
.config("spark.executor.instances", "1") \
.config("spark.executor.cores", "1") \
.config("spark.executor.memory", "1g") \
.config("spark.kubernetes.namespace", "spark-dev") \
.config("spark.kubernetes.container.image", "wenyixu101/spark:3.5.0-python3.11") \
.config("spark.kubernetes.authenticate.driver.serviceAccountName", "spark") \
.config("spark.kubernetes.authenticate.executor.serviceAccountName", "spark") \
.config("spark.kubernetes.namespace", namespace) \
.config("spark.kubernetes.container.image", executor_image) \
.config("spark.kubernetes.authenticate.driver.serviceAccountName", service_account) \
.config("spark.kubernetes.authenticate.executor.serviceAccountName", service_account) \
.config("spark.eventLog.enabled", "true") \
.config("spark.eventLog.dir", f"gs://{bucket_name}/event-logs/") \
.config("spark.history.fs.logDirectory", f"gs://{bucket_name}/event-logs/") \
Expand All @@ -65,8 +66,7 @@ def start():
v1 = client.CoreV1Api()

# Fetching the service details
service_name = "notebook-spark-ui"
namespace = "spark-dev"
service_name = os.environ.get("WEBUI_SERVICE_NAME", "notebook-spark-ui")
service = v1.read_namespaced_service(service_name, namespace)

webui_host = service.status.load_balancer.ingress[0].ip
Expand Down
2 changes: 1 addition & 1 deletion helm/data-platform/templates/notebook-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
serviceAccountName: spark
containers:
- name: notebook
image: wenyixu101/all-spark-notebook:spark-3.5.0_25
image: wenyixu101/all-spark-notebook:spark-3.5.0_26
imagePullPolicy: Always
command: ["/bin/bash", "-c", "start-notebook.sh"]
ports:
Expand Down

0 comments on commit 86657ee

Please sign in to comment.