Refactor Spark configuration keys in create_spark function

Updated the configuration keys in the create_spark function within startup.py to use the correct Spark naming convention, changing 'executor_memory', 'executor_cores', and 'executor_instances' to 'spark.executor.memory', 'spark.executor.cores', and 'spark.executor.instances'. Adjusted the corresponding references in the Spark session creation logic for improved consistency and clarity.
xuwenyihust · Dec 10, 2024 · 4922bd8 · 4922bd8
1 parent ec1ee73
commit 4922bd8
Showing 1 changed file with 7 additions and 7 deletions.
diff --git a/docker/notebook/startup.py b/docker/notebook/startup.py
@@ -116,9 +116,9 @@ def create_spark(notebook_path=None):
     except Exception as e:
         logger.error(f"Error loading config: {str(e)}. Using defaults.")
         config_json = {
-            'executor_memory': '1g',
-            'executor_cores': 1,
-            'executor_instances': 1
+            'spark.executor.memory': '1g',
+            'spark.executor.cores': 1,
+            'spark.executor.instances': 1
         }
 
     spark = PawMarkSparkSession(
@@ -133,11 +133,11 @@ def create_spark(notebook_path=None):
             .config("spark.eventLog.dir", "/opt/data/spark-events") \
             .config("spark.history.fs.logDirectory", "/opt/data/spark-events") \
             .config("spark.sql.warehouse.dir", "/opt/data/spark-warehouse") \
-            .config("executor.memory", config_json['executor_memory']) \
-            .config("executor.cores", config_json['executor_cores']) \
-            .config("spark.executor.instances", config_json['executor_instances']) \
+            .config("spark.executor.memory", config_json['spark.executor.memory']) \
+            .config("spark.executor.cores", config_json['spark.executor.cores']) \
+            .config("spark.executor.instances", config_json['spark.executor.instances']) \
             .getOrCreate()
-        )
+    )
 
     return spark