forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPurchase.py
28 lines (20 loc) · 829 Bytes
/
Purchase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Determine the first purchase date for each user.
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import min
# Initialize Spark session
spark = SparkSession.builder.appName("FirstPurchaseDate").getOrCreate()
# Sample data
purchase_data = [
Row(UserID=1, PurchaseDate='2023-01-05'),
Row(UserID=1, PurchaseDate='2023-01-10'),
Row(UserID=2, PurchaseDate='2023-01-03'),
Row(UserID=3, PurchaseDate='2023-01-12')
]
# Create DataFrame
df_purchases = spark.createDataFrame(purchase_data)
# Convert PurchaseDate to date type
df_purchases = df_purchases.withColumn("PurchaseDate", col("PurchaseDate").cast("date"))
# Find first purchase date for each user
first_purchase = df_purchases.groupBy("UserID").agg(min("PurchaseDate").alias("FirstPurchaseDate"))
# Show results
first_purchase.show()