Skip to content

Commit

Permalink
Add setting to tune spark
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Apr 5, 2024
1 parent 1c9d39f commit 3fd91a1
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
9 changes: 6 additions & 3 deletions queries/pyspark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@

def get_or_create_spark() -> SparkSession:
spark = (
SparkSession.builder.appName("spark_queries").master("local[*]").getOrCreate()
SparkSession.builder.appName("spark_queries")
.master("local[*]")
.config("spark.driver.memory", settings.run.spark_driver_memory)
.config("spark.executor.memory", settings.run.spark_executor_memory)
.config("spark.log.level", settings.run.spark_log_level)
.getOrCreate()
)
spark.sparkContext.setLogLevel(settings.run.spark_log_level)

return spark


Expand Down
5 changes: 4 additions & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ class Run(BaseSettings):
show_results: bool = False
check_results: bool = False # Only available for SCALE_FACTOR=1

spark_log_level: str = "ERROR"
polars_show_plan: bool = False
polars_streaming: bool = False

spark_driver_memory: str = "1g" # Tune as needed for optimal performance
spark_executor_memory: str = "1g" # Tune as needed for optimal performance
spark_log_level: str = "ERROR"

model_config = SettingsConfigDict(
env_prefix="run_", env_file=".env", extra="ignore"
)
Expand Down

0 comments on commit 3fd91a1

Please sign in to comment.