From cf6759924a54c36f1b84f884a843d19c5ed3dc9b Mon Sep 17 00:00:00 2001 From: Douglas Moore Date: Sat, 7 Oct 2023 14:15:35 -0400 Subject: [PATCH] Update quick-start.mdx Add working copy paste example. It takes too long to read through 3-6 paragraphs and assemble everything. I know because I just spent an hour on something that should have been 2 minutes. --- src/pages/latest/quick-start.mdx | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/pages/latest/quick-start.mdx b/src/pages/latest/quick-start.mdx index 757adbb..9d34304 100644 --- a/src/pages/latest/quick-start.mdx +++ b/src/pages/latest/quick-start.mdx @@ -9,6 +9,28 @@ This guide helps you quickly explore the main features of Delta Lake. It provides code snippets that show how to read from and write to Delta tables from interactive, batch, and streaming queries. +## Copy paste example (assumes Python environment is setup) +```bash +pip install pyspark==3.4.1 delta-spark==2.4.0 +pyspark --packages io.delta:delta-core_2.12:2.4.0 --conf "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension" --conf "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" << EOF + +import pyspark +from delta import * + +builder = pyspark.sql.SparkSession.builder.appName("MyApp") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + +spark = configure_spark_with_delta_pip(builder).getOrCreate() + +spark.sql("CREATE OR REPLACE TABLE mytable_delta(id BIGINT) USING DELTA") +spark.range(5).write.format('delta').mode('append').saveAsTable("mytable_delta") +spark.read.table("mytable_delta").show() +spark.sql("DESCRIBE TABLE mytable_delta").show() + +EOF +``` + ## Set up Apache Spark with Delta Lake Follow these instructions to set up Delta Lake with Spark. You can run the