forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWeb-user.py
23 lines (17 loc) · 786 Bytes
/
Web-user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Find the count of unique visitors to a website per day.
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import countDistinct
# Initialize Spark session
spark = SparkSession.builder.appName("UniqueVisitorsPerDay").getOrCreate()
# Sample data
visitor_data = [Row(Date='2023-01-01', VisitorID=101),
Row(Date='2023-01-01', VisitorID=102),
Row(Date='2023-01-01', VisitorID=101),
Row(Date='2023-01-02', VisitorID=103),
Row(Date='2023-01-02', VisitorID=101)]
# Create DataFrame
df_visitors = spark.createDataFrame(visitor_data)
# Count unique visitors per day
unique_visitors = df_visitors.groupBy('Date').agg(countDistinct('VisitorID').alias('UniqueVisitors'))
# Show results
unique_visitors.show()