From ed26d42b90d34bb85396a00eb07c2b303e7a6732 Mon Sep 17 00:00:00 2001 From: Wenyi Xu Date: Tue, 10 Dec 2024 13:56:55 +0800 Subject: [PATCH] Enhance Spark session creation and configuration handling - Updated `create_spark` function in `startup.py` to accept an optional `notebook_path` parameter, allowing for dynamic configuration retrieval based on the notebook context. - Improved error handling in `create_spark` to log errors and use default Spark configuration when the API request fails. - Modified `SparkModel.js` to pass the `notebookPath` to the `create_spark` function, ensuring proper session initialization. - Cleaned up the demo notebook by removing outdated code cells, enhancing clarity and usability. --- docker/notebook/startup.py | 19 +++++--- examples/user_0@gmail.com/demo.ipynb | 65 ---------------------------- webapp/src/models/SparkModel.js | 2 +- 3 files changed, 13 insertions(+), 73 deletions(-) diff --git a/docker/notebook/startup.py b/docker/notebook/startup.py index 56cd569..cee0997 100644 --- a/docker/notebook/startup.py +++ b/docker/notebook/startup.py @@ -106,13 +106,21 @@ def _repr_html_(self): """ -def create_spark(): +def create_spark(notebook_path=None): logger.info("Creating Spark session") try: - config_json = requests.get("http://server:5002/spark_app/config").json() + if notebook_path: + config_json = requests.get(f"http://server:5002/spark_app/{notebook_path}/config").json() + else: + config_json = requests.get("http://server:5002/spark_app/config").json() except Exception as e: - config_json = 'Error loading config: ' + str(e) - + logger.error(f"Error loading config: {str(e)}. Using defaults.") + config_json = { + 'spark.executor.memory': '1g', + 'spark.executor.cores': 1, + 'spark.executor.instances': 1 + } + spark = PawMarkSparkSession( config_json, SparkSession.builder \ @@ -138,6 +146,3 @@ def create_spark(): if ip is not None: # Add to global namespace ip.user_global_ns['create_spark'] = create_spark - -# Don't create spark instance by default -# Remove or comment out: spark = create_spark_dev() \ No newline at end of file diff --git a/examples/user_0@gmail.com/demo.ipynb b/examples/user_0@gmail.com/demo.ipynb index 286c381..6e7c2aa 100644 --- a/examples/user_0@gmail.com/demo.ipynb +++ b/examples/user_0@gmail.com/demo.ipynb @@ -10,71 +10,6 @@ "- This is just a demo notebook\n", "- For testing only" ] - }, - { - "cell_type": "code", - "isExecuted": false, - "lastExecutionResult": "success", - "lastExecutionTime": "2024-12-10 03:27:50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "

SparkSession - in-memory

\n", - " \n", - "
\n", - "

SparkContext

\n", - "\n", - "

Spark UI

\n", - "\n", - "
\n", - "
Version
\n", - "
v3.5.0
\n", - "
Master
\n", - "
spark://spark-master:7077
\n", - "
AppName
\n", - "
spark-1733801092185
\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "from pyspark.sql import SparkSession\n", - "\n", - "spark = SparkSession.builder\\\n", - " .appName(\"spark-1733801270245\")\\\n", - " .master(\"spark://spark-master:7077\")\\\n", - " .config(\"spark.jars.packages\", \"io.delta:delta-spark_2.12:3.0.0\")\\\n", - " .config(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\")\\\n", - " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\\\n", - " .config(\"spark.eventLog.enabled\", \"true\")\\\n", - " .config(\"spark.eventLog.dir\", \"/opt/data/spark-events\")\\\n", - " .config(\"spark.history.fs.logDirectory\", \"/opt/data/spark-events\")\\\n", - " .config(\"spark.sql.warehouse.dir\", \"/opt/data/spark-warehouse\")\\\n", - " .config(\"spark.executor.memory\", \"1g\")\\\n", - " .config(\"spark.executor.cores\", 1)\\\n", - " .config(\"spark.executor.instances\", 1)\\\n", - " .config(\"spark.driver.memory\", \"1g\")\\\n", - " .config(\"spark.driver.cores\", 1)\\\n", - " .getOrCreate()\n", - "\n", - "spark\n" - ] } ], "metadata": { diff --git a/webapp/src/models/SparkModel.js b/webapp/src/models/SparkModel.js index 3990c7b..8232c67 100644 --- a/webapp/src/models/SparkModel.js +++ b/webapp/src/models/SparkModel.js @@ -61,7 +61,7 @@ class SparkModel { const sparkAppId = `spark-${Date.now()}`; // Create a cell with Spark initialization code that uses the existing spark instance - const sparkInitCode = `spark = create_spark() + const sparkInitCode = `spark = create_spark("${notebookPath}") spark`; // Create the Spark session with this config