6
6
from IPython .display import *
7
7
from kubernetes import client , config
8
8
9
- print ("Running startup script startup.py" )
10
-
11
9
# Initialize the GCS client
12
10
storage_client = storage .Client ()
13
11
16
14
bucket = storage_client .bucket (bucket_name )
17
15
18
16
# Ensure the local directory exists
19
- local_notebook_dir = " /home/jovyan/"
17
+ local_notebook_dir = os . environ . get ( "HOME_DIR" , " /home/jovyan" )
20
18
os .makedirs (local_notebook_dir , exist_ok = True )
21
19
22
20
# Sync from GCS to local
30
28
31
29
app_name = os .environ .get ("APP_NAME" , "PySpark Example" )
32
30
driver_host = "notebook-cluster-ip.spark-dev.svc.cluster.local"
31
+ namespace = os .environ .get ("NAMESPACE" , "spark-dev" )
32
+ service_account = os .environ .get ("SERVICE_ACCOUNT" , "spark" )
33
+ executor_image = os .environ .get ("EXECUTOR_IMAGE" , "wenyixu101/spark:3.5.0-python3.11" )
33
34
34
35
# Create a Spark session
35
36
def create_spark ():
@@ -43,10 +44,10 @@ def create_spark():
43
44
.config ("spark.executor.instances" , "1" ) \
44
45
.config ("spark.executor.cores" , "1" ) \
45
46
.config ("spark.executor.memory" , "1g" ) \
46
- .config ("spark.kubernetes.namespace" , "spark-dev" ) \
47
- .config ("spark.kubernetes.container.image" , "wenyixu101/spark:3.5.0-python3.11" ) \
48
- .config ("spark.kubernetes.authenticate.driver.serviceAccountName" , "spark" ) \
49
- .config ("spark.kubernetes.authenticate.executor.serviceAccountName" , "spark" ) \
47
+ .config ("spark.kubernetes.namespace" , namespace ) \
48
+ .config ("spark.kubernetes.container.image" , executor_image ) \
49
+ .config ("spark.kubernetes.authenticate.driver.serviceAccountName" , service_account ) \
50
+ .config ("spark.kubernetes.authenticate.executor.serviceAccountName" , service_account ) \
50
51
.config ("spark.eventLog.enabled" , "true" ) \
51
52
.config ("spark.eventLog.dir" , f"gs://{ bucket_name } /event-logs/" ) \
52
53
.config ("spark.history.fs.logDirectory" , f"gs://{ bucket_name } /event-logs/" ) \
@@ -65,8 +66,7 @@ def start():
65
66
v1 = client .CoreV1Api ()
66
67
67
68
# Fetching the service details
68
- service_name = "notebook-spark-ui"
69
- namespace = "spark-dev"
69
+ service_name = os .environ .get ("WEBUI_SERVICE_NAME" , "notebook-spark-ui" )
70
70
service = v1 .read_namespaced_service (service_name , namespace )
71
71
72
72
webui_host = service .status .load_balancer .ingress [0 ].ip
0 commit comments